1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ 39 */ 40 41 /* 42 * x86_64 Trap and System call handling 43 */ 44 45 #include "use_isa.h" 46 47 #include "opt_ddb.h" 48 #include "opt_ktrace.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/proc.h> 53 #include <sys/pioctl.h> 54 #include <sys/kernel.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/signal2.h> 58 #include <sys/syscall.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/uio.h> 62 #include <sys/vmmeter.h> 63 #include <sys/malloc.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 #include <sys/ktr.h> 68 #include <sys/vkernel.h> 69 #include <sys/sysproto.h> 70 #include <sys/sysunion.h> 71 #include <sys/vmspace.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_param.h> 75 #include <sys/lock.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_extern.h> 81 82 #include <machine/cpu.h> 83 #include <machine/md_var.h> 84 #include <machine/pcb.h> 85 #include <machine/smp.h> 86 #include <machine/tss.h> 87 #include <machine/globaldata.h> 88 89 #include <ddb/ddb.h> 90 91 #include <sys/msgport2.h> 92 #include <sys/thread2.h> 93 #include <sys/mplock2.h> 94 95 int (*pmath_emulate) (struct trapframe *); 96 97 static int trap_pfault (struct trapframe *, int, vm_offset_t); 98 static void trap_fatal (struct trapframe *, int, vm_offset_t); 99 void dblfault_handler (void); 100 extern int vmm_enabled; 101 102 static struct krate segfltrate = { 1 }; 103 104 #if 0 105 extern inthand_t IDTVEC(syscall); 106 #endif 107 108 #define MAX_TRAP_MSG 30 109 static char *trap_msg[] = { 110 "", /* 0 unused */ 111 "privileged instruction fault", /* 1 T_PRIVINFLT */ 112 "", /* 2 unused */ 113 "breakpoint instruction fault", /* 3 T_BPTFLT */ 114 "", /* 4 unused */ 115 "", /* 5 unused */ 116 "arithmetic trap", /* 6 T_ARITHTRAP */ 117 "system forced exception", /* 7 T_ASTFLT */ 118 "", /* 8 unused */ 119 "general protection fault", /* 9 T_PROTFLT */ 120 "trace trap", /* 10 T_TRCTRAP */ 121 "", /* 11 unused */ 122 "page fault", /* 12 T_PAGEFLT */ 123 "", /* 13 unused */ 124 "alignment fault", /* 14 T_ALIGNFLT */ 125 "", /* 15 unused */ 126 "", /* 16 unused */ 127 "", /* 17 unused */ 128 "integer divide fault", /* 18 T_DIVIDE */ 129 "non-maskable interrupt trap", /* 19 T_NMI */ 130 "overflow trap", /* 20 T_OFLOW */ 131 "FPU bounds check fault", /* 21 T_BOUND */ 132 "FPU device not available", /* 22 T_DNA */ 133 "double fault", /* 23 T_DOUBLEFLT */ 134 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 135 "invalid TSS fault", /* 25 T_TSSFLT */ 136 "segment not present fault", /* 26 T_SEGNPFLT */ 137 "stack fault", /* 27 T_STKFLT */ 138 "machine check trap", /* 28 T_MCHK */ 139 "SIMD floating-point exception", /* 29 T_XMMFLT */ 140 "reserved (unknown) fault", /* 30 T_RESERVED */ 141 }; 142 143 #ifdef DDB 144 static int ddb_on_nmi = 1; 145 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, 146 &ddb_on_nmi, 0, "Go to DDB on NMI"); 147 #endif 148 static int panic_on_nmi = 1; 149 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 150 &panic_on_nmi, 0, "Panic on NMI"); 151 static int fast_release; 152 SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, 153 &fast_release, 0, "Passive Release was optimal"); 154 static int slow_release; 155 SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, 156 &slow_release, 0, "Passive Release was nonoptimal"); 157 158 /* 159 * Passively intercepts the thread switch function to increase 160 * the thread priority from a user priority to a kernel priority, reducing 161 * syscall and trap overhead for the case where no switch occurs. 162 * 163 * Synchronizes td_ucred with p_ucred. This is used by system calls, 164 * signal handling, faults, AST traps, and anything else that enters the 165 * kernel from userland and provides the kernel with a stable read-only 166 * copy of the process ucred. 167 */ 168 static __inline void 169 userenter(struct thread *curtd, struct proc *curp) 170 { 171 struct ucred *ocred; 172 struct ucred *ncred; 173 174 curtd->td_release = lwkt_passive_release; 175 176 if (curtd->td_ucred != curp->p_ucred) { 177 ncred = crhold(curp->p_ucred); 178 ocred = curtd->td_ucred; 179 curtd->td_ucred = ncred; 180 if (ocred) 181 crfree(ocred); 182 } 183 } 184 185 /* 186 * Handle signals, profiling, and other AST's and/or tasks that 187 * must be completed before we can return to or try to return to userland. 188 * 189 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 190 * arithmatic on the delta calculation so the absolute tick values are 191 * truncated to an integer. 192 */ 193 static void 194 userret(struct lwp *lp, struct trapframe *frame, int sticks) 195 { 196 struct proc *p = lp->lwp_proc; 197 int sig; 198 int ptok; 199 200 /* 201 * Charge system time if profiling. Note: times are in microseconds. 202 * This may do a copyout and block, so do it first even though it 203 * means some system time will be charged as user time. 204 */ 205 if (p->p_flags & P_PROFIL) { 206 addupc_task(p, frame->tf_rip, 207 (u_int)((int)lp->lwp_thread->td_sticks - sticks)); 208 } 209 210 recheck: 211 /* 212 * Specific on-return-to-usermode checks (LWP_MP_WEXIT, 213 * LWP_MP_VNLRU, etc). 214 */ 215 if (lp->lwp_mpflags & LWP_MP_URETMASK) 216 lwpuserret(lp); 217 218 /* 219 * Block here if we are in a stopped state. 220 */ 221 if (STOPLWP(p, lp)) { 222 lwkt_gettoken(&p->p_token); 223 tstop(); 224 lwkt_reltoken(&p->p_token); 225 goto recheck; 226 } 227 228 /* 229 * Post any pending upcalls. If running a virtual kernel be sure 230 * to restore the virtual kernel's vmspace before posting the upcall. 231 */ 232 if (p->p_flags & (P_SIGVTALRM | P_SIGPROF)) { 233 lwkt_gettoken(&p->p_token); 234 if (p->p_flags & P_SIGVTALRM) { 235 p->p_flags &= ~P_SIGVTALRM; 236 ksignal(p, SIGVTALRM); 237 } 238 if (p->p_flags & P_SIGPROF) { 239 p->p_flags &= ~P_SIGPROF; 240 ksignal(p, SIGPROF); 241 } 242 lwkt_reltoken(&p->p_token); 243 goto recheck; 244 } 245 246 /* 247 * Post any pending signals 248 * 249 * WARNING! postsig() can exit and not return. 250 */ 251 if ((sig = CURSIG_LCK_TRACE(lp, &ptok)) != 0) { 252 postsig(sig, ptok); 253 goto recheck; 254 } 255 256 /* 257 * block here if we are swapped out, but still process signals 258 * (such as SIGKILL). proc0 (the swapin scheduler) is already 259 * aware of our situation, we do not have to wake it up. 260 */ 261 if (p->p_flags & P_SWAPPEDOUT) { 262 lwkt_gettoken(&p->p_token); 263 get_mplock(); 264 p->p_flags |= P_SWAPWAIT; 265 swapin_request(); 266 if (p->p_flags & P_SWAPWAIT) 267 tsleep(p, PCATCH, "SWOUT", 0); 268 p->p_flags &= ~P_SWAPWAIT; 269 rel_mplock(); 270 lwkt_reltoken(&p->p_token); 271 goto recheck; 272 } 273 274 /* 275 * In a multi-threaded program it is possible for a thread to change 276 * signal state during a system call which temporarily changes the 277 * signal mask. In this case postsig() might not be run and we 278 * have to restore the mask ourselves. 279 */ 280 if (lp->lwp_flags & LWP_OLDMASK) { 281 lp->lwp_flags &= ~LWP_OLDMASK; 282 lp->lwp_sigmask = lp->lwp_oldsigmask; 283 goto recheck; 284 } 285 } 286 287 /* 288 * Cleanup from userenter and any passive release that might have occured. 289 * We must reclaim the current-process designation before we can return 290 * to usermode. We also handle both LWKT and USER reschedule requests. 291 */ 292 static __inline void 293 userexit(struct lwp *lp) 294 { 295 struct thread *td = lp->lwp_thread; 296 /* globaldata_t gd = td->td_gd; */ 297 298 /* 299 * Handle stop requests at kernel priority. Any requests queued 300 * after this loop will generate another AST. 301 */ 302 while (STOPLWP(lp->lwp_proc, lp)) { 303 lwkt_gettoken(&lp->lwp_proc->p_token); 304 tstop(); 305 lwkt_reltoken(&lp->lwp_proc->p_token); 306 } 307 308 /* 309 * Reduce our priority in preparation for a return to userland. If 310 * our passive release function was still in place, our priority was 311 * never raised and does not need to be reduced. 312 */ 313 lwkt_passive_recover(td); 314 315 /* 316 * Become the current user scheduled process if we aren't already, 317 * and deal with reschedule requests and other factors. 318 */ 319 lp->lwp_proc->p_usched->acquire_curproc(lp); 320 /* WARNING: we may have migrated cpu's */ 321 /* gd = td->td_gd; */ 322 } 323 324 #if !defined(KTR_KERNENTRY) 325 #define KTR_KERNENTRY KTR_ALL 326 #endif 327 KTR_INFO_MASTER(kernentry); 328 KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, 329 "TRAP(pid %hd, tid %hd, trapno %ld, eva %lu)", 330 pid_t pid, lwpid_t tid, register_t trapno, vm_offset_t eva); 331 KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "TRAP_RET(pid %hd, tid %hd)", 332 pid_t pid, lwpid_t tid); 333 KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "SYSC(pid %hd, tid %hd, nr %ld)", 334 pid_t pid, lwpid_t tid, register_t trapno); 335 KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "SYSRET(pid %hd, tid %hd, err %d)", 336 pid_t pid, lwpid_t tid, int err); 337 KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "FORKRET(pid %hd, tid %hd)", 338 pid_t pid, lwpid_t tid); 339 340 /* 341 * Exception, fault, and trap interface to the kernel. 342 * This common code is called from assembly language IDT gate entry 343 * routines that prepare a suitable stack frame, and restore this 344 * frame after the exception has been processed. 345 * 346 * This function is also called from doreti in an interlock to handle ASTs. 347 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap 348 * 349 * NOTE! We have to retrieve the fault address prior to obtaining the 350 * MP lock because get_mplock() may switch out. YYY cr2 really ought 351 * to be retrieved by the assembly code, not here. 352 * 353 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing 354 * if an attempt is made to switch from a fast interrupt or IPI. This is 355 * necessary to properly take fatal kernel traps on SMP machines if 356 * get_mplock() has to block. 357 */ 358 359 void 360 user_trap(struct trapframe *frame) 361 { 362 struct globaldata *gd = mycpu; 363 struct thread *td = gd->gd_curthread; 364 struct lwp *lp = td->td_lwp; 365 struct proc *p; 366 int sticks = 0; 367 int i = 0, ucode = 0, type, code; 368 #ifdef INVARIANTS 369 int crit_count = td->td_critcount; 370 lwkt_tokref_t curstop = td->td_toks_stop; 371 #endif 372 vm_offset_t eva; 373 374 p = td->td_proc; 375 376 if (frame->tf_trapno == T_PAGEFLT) 377 eva = frame->tf_addr; 378 else 379 eva = 0; 380 #if 0 381 kprintf("USER_TRAP AT %08lx xflags %ld trapno %ld eva %08lx\n", 382 frame->tf_rip, frame->tf_xflags, frame->tf_trapno, eva); 383 #endif 384 385 /* 386 * Everything coming from user mode runs through user_trap, 387 * including system calls. 388 */ 389 if (frame->tf_trapno == T_FAST_SYSCALL) { 390 syscall2(frame); 391 return; 392 } 393 394 KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, 395 frame->tf_trapno, eva); 396 397 #ifdef DDB 398 if (db_active) { 399 eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); 400 ++gd->gd_trap_nesting_level; 401 trap_fatal(frame, TRUE, eva); 402 --gd->gd_trap_nesting_level; 403 goto out2; 404 } 405 #endif 406 407 type = frame->tf_trapno; 408 code = frame->tf_err; 409 410 userenter(td, p); 411 412 sticks = (int)td->td_sticks; 413 lp->lwp_md.md_regs = frame; 414 415 switch (type) { 416 case T_PRIVINFLT: /* privileged instruction fault */ 417 i = SIGILL; 418 ucode = ILL_PRVOPC; 419 break; 420 421 case T_BPTFLT: /* bpt instruction fault */ 422 case T_TRCTRAP: /* trace trap */ 423 frame->tf_rflags &= ~PSL_T; 424 i = SIGTRAP; 425 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); 426 break; 427 428 case T_ARITHTRAP: /* arithmetic trap */ 429 ucode = code; 430 i = SIGFPE; 431 break; 432 433 case T_ASTFLT: /* Allow process switch */ 434 mycpu->gd_cnt.v_soft++; 435 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { 436 atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); 437 addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); 438 } 439 goto out; 440 441 /* 442 * The following two traps can happen in 443 * vm86 mode, and, if so, we want to handle 444 * them specially. 445 */ 446 case T_PROTFLT: /* general protection fault */ 447 case T_STKFLT: /* stack fault */ 448 #if 0 449 if (frame->tf_eflags & PSL_VM) { 450 i = vm86_emulate((struct vm86frame *)frame); 451 if (i == 0) 452 goto out; 453 break; 454 } 455 #endif 456 /* FALL THROUGH */ 457 458 case T_SEGNPFLT: /* segment not present fault */ 459 case T_TSSFLT: /* invalid TSS fault */ 460 case T_DOUBLEFLT: /* double fault */ 461 default: 462 i = SIGBUS; 463 ucode = code + BUS_SEGM_FAULT ; 464 break; 465 466 case T_PAGEFLT: /* page fault */ 467 i = trap_pfault(frame, TRUE, eva); 468 if (i == -1 || i == 0) 469 goto out; 470 471 472 if (i == SIGSEGV) 473 ucode = SEGV_MAPERR; 474 else { 475 i = SIGSEGV; 476 ucode = SEGV_ACCERR; 477 } 478 break; 479 480 case T_DIVIDE: /* integer divide fault */ 481 ucode = FPE_INTDIV; 482 i = SIGFPE; 483 break; 484 485 #if NISA > 0 486 case T_NMI: 487 /* machine/parity/power fail/"kitchen sink" faults */ 488 if (isa_nmi(code) == 0) { 489 #ifdef DDB 490 /* 491 * NMI can be hooked up to a pushbutton 492 * for debugging. 493 */ 494 if (ddb_on_nmi) { 495 kprintf ("NMI ... going to debugger\n"); 496 kdb_trap(type, 0, frame); 497 } 498 #endif /* DDB */ 499 goto out2; 500 } else if (panic_on_nmi) 501 panic("NMI indicates hardware failure"); 502 break; 503 #endif /* NISA > 0 */ 504 505 case T_OFLOW: /* integer overflow fault */ 506 ucode = FPE_INTOVF; 507 i = SIGFPE; 508 break; 509 510 case T_BOUND: /* bounds check fault */ 511 ucode = FPE_FLTSUB; 512 i = SIGFPE; 513 break; 514 515 case T_DNA: 516 /* 517 * Virtual kernel intercept - pass the DNA exception 518 * to the (emulated) virtual kernel if it asked to handle 519 * it. This occurs when the virtual kernel is holding 520 * onto the FP context for a different emulated 521 * process then the one currently running. 522 * 523 * We must still call npxdna() since we may have 524 * saved FP state that the (emulated) virtual kernel 525 * needs to hand over to a different emulated process. 526 */ 527 if (lp->lwp_vkernel && lp->lwp_vkernel->ve && 528 (td->td_pcb->pcb_flags & FP_VIRTFP) 529 ) { 530 npxdna(frame); 531 break; 532 } 533 534 /* 535 * The kernel may have switched out the FP unit's 536 * state, causing the user process to take a fault 537 * when it tries to use the FP unit. Restore the 538 * state here 539 */ 540 if (npxdna(frame)) 541 goto out; 542 if (!pmath_emulate) { 543 i = SIGFPE; 544 ucode = FPE_FPU_NP_TRAP; 545 break; 546 } 547 i = (*pmath_emulate)(frame); 548 if (i == 0) { 549 if (!(frame->tf_rflags & PSL_T)) 550 goto out2; 551 frame->tf_rflags &= ~PSL_T; 552 i = SIGTRAP; 553 } 554 /* else ucode = emulator_only_knows() XXX */ 555 break; 556 557 case T_FPOPFLT: /* FPU operand fetch fault */ 558 ucode = T_FPOPFLT; 559 i = SIGILL; 560 break; 561 562 case T_XMMFLT: /* SIMD floating-point exception */ 563 ucode = 0; /* XXX */ 564 i = SIGFPE; 565 break; 566 } 567 568 /* 569 * Virtual kernel intercept - if the fault is directly related to a 570 * VM context managed by a virtual kernel then let the virtual kernel 571 * handle it. 572 */ 573 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 574 vkernel_trap(lp, frame); 575 goto out; 576 } 577 578 /* 579 * Translate fault for emulators (e.g. Linux) 580 */ 581 if (*p->p_sysent->sv_transtrap) 582 i = (*p->p_sysent->sv_transtrap)(i, type); 583 584 trapsignal(lp, i, ucode); 585 586 #ifdef DEBUG 587 if (type <= MAX_TRAP_MSG) { 588 uprintf("fatal process exception: %s", 589 trap_msg[type]); 590 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 591 uprintf(", fault VA = 0x%lx", (u_long)eva); 592 uprintf("\n"); 593 } 594 #endif 595 596 out: 597 userret(lp, frame, sticks); 598 userexit(lp); 599 out2: ; 600 KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 601 #ifdef INVARIANTS 602 KASSERT(crit_count == td->td_critcount, 603 ("trap: critical section count mismatch! %d/%d", 604 crit_count, td->td_pri)); 605 KASSERT(curstop == td->td_toks_stop, 606 ("trap: extra tokens held after trap! %ld/%ld", 607 curstop - &td->td_toks_base, 608 td->td_toks_stop - &td->td_toks_base)); 609 #endif 610 } 611 612 void 613 kern_trap(struct trapframe *frame) 614 { 615 struct globaldata *gd = mycpu; 616 struct thread *td = gd->gd_curthread; 617 struct lwp *lp; 618 struct proc *p; 619 int i = 0, ucode = 0, type, code; 620 #ifdef INVARIANTS 621 int crit_count = td->td_critcount; 622 lwkt_tokref_t curstop = td->td_toks_stop; 623 #endif 624 vm_offset_t eva; 625 626 lp = td->td_lwp; 627 p = td->td_proc; 628 629 if (frame->tf_trapno == T_PAGEFLT) 630 eva = frame->tf_addr; 631 else 632 eva = 0; 633 634 #ifdef DDB 635 if (db_active) { 636 ++gd->gd_trap_nesting_level; 637 trap_fatal(frame, FALSE, eva); 638 --gd->gd_trap_nesting_level; 639 goto out2; 640 } 641 #endif 642 643 type = frame->tf_trapno; 644 code = frame->tf_err; 645 646 #if 0 647 kernel_trap: 648 #endif 649 /* kernel trap */ 650 651 switch (type) { 652 case T_PAGEFLT: /* page fault */ 653 trap_pfault(frame, FALSE, eva); 654 goto out2; 655 656 case T_DNA: 657 /* 658 * The kernel may be using npx for copying or other 659 * purposes. 660 */ 661 panic("kernel NPX should not happen"); 662 if (npxdna(frame)) 663 goto out2; 664 break; 665 666 case T_PROTFLT: /* general protection fault */ 667 case T_SEGNPFLT: /* segment not present fault */ 668 /* 669 * Invalid segment selectors and out of bounds 670 * %eip's and %esp's can be set up in user mode. 671 * This causes a fault in kernel mode when the 672 * kernel tries to return to user mode. We want 673 * to get this fault so that we can fix the 674 * problem here and not have to check all the 675 * selectors and pointers when the user changes 676 * them. 677 */ 678 if (mycpu->gd_intr_nesting_level == 0) { 679 if (td->td_pcb->pcb_onfault) { 680 frame->tf_rip = 681 (register_t)td->td_pcb->pcb_onfault; 682 goto out2; 683 } 684 } 685 break; 686 687 case T_TSSFLT: 688 /* 689 * PSL_NT can be set in user mode and isn't cleared 690 * automatically when the kernel is entered. This 691 * causes a TSS fault when the kernel attempts to 692 * `iret' because the TSS link is uninitialized. We 693 * want to get this fault so that we can fix the 694 * problem here and not every time the kernel is 695 * entered. 696 */ 697 if (frame->tf_rflags & PSL_NT) { 698 frame->tf_rflags &= ~PSL_NT; 699 goto out2; 700 } 701 break; 702 703 case T_TRCTRAP: /* trace trap */ 704 #if 0 705 if (frame->tf_eip == (int)IDTVEC(syscall)) { 706 /* 707 * We've just entered system mode via the 708 * syscall lcall. Continue single stepping 709 * silently until the syscall handler has 710 * saved the flags. 711 */ 712 goto out2; 713 } 714 if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { 715 /* 716 * The syscall handler has now saved the 717 * flags. Stop single stepping it. 718 */ 719 frame->tf_eflags &= ~PSL_T; 720 goto out2; 721 } 722 #endif 723 #if 0 724 /* 725 * Ignore debug register trace traps due to 726 * accesses in the user's address space, which 727 * can happen under several conditions such as 728 * if a user sets a watchpoint on a buffer and 729 * then passes that buffer to a system call. 730 * We still want to get TRCTRAPS for addresses 731 * in kernel space because that is useful when 732 * debugging the kernel. 733 */ 734 if (user_dbreg_trap()) { 735 /* 736 * Reset breakpoint bits because the 737 * processor doesn't 738 */ 739 load_dr6(rdr6() & 0xfffffff0); 740 goto out2; 741 } 742 #endif 743 /* 744 * Fall through (TRCTRAP kernel mode, kernel address) 745 */ 746 case T_BPTFLT: 747 /* 748 * If DDB is enabled, let it handle the debugger trap. 749 * Otherwise, debugger traps "can't happen". 750 */ 751 #ifdef DDB 752 if (kdb_trap (type, 0, frame)) 753 goto out2; 754 #endif 755 break; 756 case T_DIVIDE: 757 trap_fatal(frame, FALSE, eva); 758 goto out2; 759 case T_NMI: 760 trap_fatal(frame, FALSE, eva); 761 goto out2; 762 case T_SYSCALL80: 763 case T_FAST_SYSCALL: 764 /* 765 * Ignore this trap generated from a spurious SIGTRAP. 766 * 767 * single stepping in / syscalls leads to spurious / SIGTRAP 768 * so ignore 769 * 770 * Haiku (c) 2007 Simon 'corecode' Schubert 771 */ 772 goto out2; 773 } 774 775 /* 776 * Translate fault for emulators (e.g. Linux) 777 */ 778 if (*p->p_sysent->sv_transtrap) 779 i = (*p->p_sysent->sv_transtrap)(i, type); 780 781 trapsignal(lp, i, ucode); 782 783 #ifdef DEBUG 784 if (type <= MAX_TRAP_MSG) { 785 uprintf("fatal process exception: %s", 786 trap_msg[type]); 787 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 788 uprintf(", fault VA = 0x%lx", (u_long)eva); 789 uprintf("\n"); 790 } 791 #endif 792 793 out2: 794 ; 795 #ifdef INVARIANTS 796 KASSERT(crit_count == td->td_critcount, 797 ("trap: critical section count mismatch! %d/%d", 798 crit_count, td->td_pri)); 799 KASSERT(curstop == td->td_toks_stop, 800 ("trap: extra tokens held after trap! %ld/%ld", 801 curstop - &td->td_toks_base, 802 td->td_toks_stop - &td->td_toks_base)); 803 #endif 804 } 805 806 int 807 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) 808 { 809 vm_offset_t va; 810 struct vmspace *vm = NULL; 811 vm_map_t map = 0; 812 int rv = 0; 813 vm_prot_t ftype; 814 thread_t td = curthread; 815 struct lwp *lp = td->td_lwp; 816 int fault_flags; 817 818 va = trunc_page(eva); 819 if (usermode == FALSE) { 820 /* 821 * This is a fault on kernel virtual memory. 822 */ 823 map = &kernel_map; 824 } else { 825 /* 826 * This is a fault on non-kernel virtual memory. 827 * vm is initialized above to NULL. If curproc is NULL 828 * or curproc->p_vmspace is NULL the fault is fatal. 829 */ 830 if (lp != NULL) 831 vm = lp->lwp_vmspace; 832 833 if (vm == NULL) 834 goto nogo; 835 836 map = &vm->vm_map; 837 } 838 839 if (frame->tf_err & PGEX_W) 840 ftype = VM_PROT_READ | VM_PROT_WRITE; 841 else if (frame->tf_err & PGEX_I) 842 ftype = VM_PROT_EXECUTE; 843 else 844 ftype = VM_PROT_READ; 845 846 if (map != &kernel_map) { 847 /* 848 * Keep swapout from messing with us during this 849 * critical time. 850 */ 851 PHOLD(lp->lwp_proc); 852 853 #if 0 854 /* 855 * Grow the stack if necessary 856 */ 857 /* grow_stack returns false only if va falls into 858 * a growable stack region and the stack growth 859 * fails. It returns true if va was not within 860 * a growable stack region, or if the stack 861 * growth succeeded. 862 */ 863 if (!grow_stack (map, va)) { 864 rv = KERN_FAILURE; 865 PRELE(lp->lwp_proc); 866 goto nogo; 867 } 868 #endif 869 870 fault_flags = 0; 871 if (usermode) 872 fault_flags |= VM_FAULT_BURST | VM_FAULT_USERMODE; 873 if (ftype & VM_PROT_WRITE) 874 fault_flags |= VM_FAULT_DIRTY; 875 else 876 fault_flags |= VM_FAULT_NORMAL; 877 rv = vm_fault(map, va, ftype, fault_flags); 878 879 PRELE(lp->lwp_proc); 880 } else { 881 /* 882 * Don't have to worry about process locking or stacks in the kernel. 883 */ 884 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 885 } 886 887 if (rv == KERN_SUCCESS) 888 return (0); 889 nogo: 890 if (!usermode) { 891 if (td->td_gd->gd_intr_nesting_level == 0 && 892 td->td_pcb->pcb_onfault) { 893 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; 894 return (0); 895 } 896 trap_fatal(frame, usermode, eva); 897 return (-1); 898 } 899 900 /* 901 * NOTE: on x86_64 we have a tf_addr field in the trapframe, no 902 * kludge is needed to pass the fault address to signal handlers. 903 */ 904 struct proc *p = td->td_proc; 905 krateprintf(&segfltrate, 906 "seg-fault accessing address %p " 907 "rip=%p pid=%d p_comm=%s\n", 908 (void *)va, 909 (void *)frame->tf_rip, p->p_pid, p->p_comm); 910 /* Debugger("seg-fault"); */ 911 912 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 913 } 914 915 static void 916 trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) 917 { 918 int code, type, ss; 919 long rsp; 920 921 code = frame->tf_xflags; 922 type = frame->tf_trapno; 923 924 if (type <= MAX_TRAP_MSG) { 925 kprintf("\n\nFatal trap %d: %s while in %s mode\n", 926 type, trap_msg[type], 927 (usermode ? "user" : "kernel")); 928 } 929 /* two separate prints in case of a trap on an unmapped page */ 930 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 931 if (type == T_PAGEFLT) { 932 kprintf("fault virtual address = %p\n", (void *)eva); 933 kprintf("fault code = %s %s, %s\n", 934 usermode ? "user" : "supervisor", 935 code & PGEX_W ? "write" : "read", 936 code & PGEX_P ? "protection violation" : "page not present"); 937 } 938 kprintf("instruction pointer = 0x%lx:0x%lx\n", 939 frame->tf_cs & 0xffff, frame->tf_rip); 940 if (usermode) { 941 ss = frame->tf_ss & 0xffff; 942 rsp = frame->tf_rsp; 943 } else { 944 ss = GSEL(GDATA_SEL, SEL_KPL); 945 rsp = (long)&frame->tf_rsp; 946 } 947 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); 948 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 949 kprintf("processor eflags = "); 950 if (frame->tf_rflags & PSL_T) 951 kprintf("trace trap, "); 952 if (frame->tf_rflags & PSL_I) 953 kprintf("interrupt enabled, "); 954 if (frame->tf_rflags & PSL_NT) 955 kprintf("nested task, "); 956 if (frame->tf_rflags & PSL_RF) 957 kprintf("resume, "); 958 #if 0 959 if (frame->tf_eflags & PSL_VM) 960 kprintf("vm86, "); 961 #endif 962 kprintf("IOPL = %jd\n", (intmax_t)((frame->tf_rflags & PSL_IOPL) >> 12)); 963 kprintf("current process = "); 964 if (curproc) { 965 kprintf("%lu (%s)\n", 966 (u_long)curproc->p_pid, curproc->p_comm ? 967 curproc->p_comm : ""); 968 } else { 969 kprintf("Idle\n"); 970 } 971 kprintf("current thread = pri %d ", curthread->td_pri); 972 if (curthread->td_critcount) 973 kprintf("(CRIT)"); 974 kprintf("\n"); 975 /** 976 * XXX FIXME: 977 * we probably SHOULD have stopped the other CPUs before now! 978 * another CPU COULD have been touching cpl at this moment... 979 */ 980 kprintf(" <- SMP: XXX"); 981 kprintf("\n"); 982 983 #ifdef KDB 984 if (kdb_trap(&psl)) 985 return; 986 #endif 987 #ifdef DDB 988 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) 989 return; 990 #endif 991 kprintf("trap number = %d\n", type); 992 if (type <= MAX_TRAP_MSG) 993 panic("%s", trap_msg[type]); 994 else 995 panic("unknown/reserved trap"); 996 } 997 998 /* 999 * Double fault handler. Called when a fault occurs while writing 1000 * a frame for a trap/exception onto the stack. This usually occurs 1001 * when the stack overflows (such is the case with infinite recursion, 1002 * for example). 1003 * 1004 * XXX Note that the current PTD gets replaced by IdlePTD when the 1005 * task switch occurs. This means that the stack that was active at 1006 * the time of the double fault is not available at <kstack> unless 1007 * the machine was idle when the double fault occurred. The downside 1008 * of this is that "trace <ebp>" in ddb won't work. 1009 */ 1010 void 1011 dblfault_handler(void) 1012 { 1013 #if JG 1014 struct mdglobaldata *gd = mdcpu; 1015 #endif 1016 1017 kprintf("\nFatal double fault:\n"); 1018 #if JG 1019 kprintf("rip = 0x%lx\n", gd->gd_common_tss.tss_rip); 1020 kprintf("rsp = 0x%lx\n", gd->gd_common_tss.tss_rsp); 1021 kprintf("rbp = 0x%lx\n", gd->gd_common_tss.tss_rbp); 1022 #endif 1023 /* two separate prints in case of a trap on an unmapped page */ 1024 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 1025 panic("double fault"); 1026 } 1027 1028 /* 1029 * syscall2 - MP aware system call request C handler 1030 * 1031 * A system call is essentially treated as a trap except that the 1032 * MP lock is not held on entry or return. We are responsible for 1033 * obtaining the MP lock if necessary and for handling ASTs 1034 * (e.g. a task switch) prior to return. 1035 * 1036 * In general, only simple access and manipulation of curproc and 1037 * the current stack is allowed without having to hold MP lock. 1038 * 1039 * MPSAFE - note that large sections of this routine are run without 1040 * the MP lock. 1041 */ 1042 void 1043 syscall2(struct trapframe *frame) 1044 { 1045 struct thread *td = curthread; 1046 struct proc *p = td->td_proc; 1047 struct lwp *lp = td->td_lwp; 1048 caddr_t params; 1049 struct sysent *callp; 1050 register_t orig_tf_rflags; 1051 int sticks; 1052 int error; 1053 int narg; 1054 #ifdef INVARIANTS 1055 int crit_count = td->td_critcount; 1056 lwkt_tokref_t curstop = td->td_toks_stop; 1057 #endif 1058 register_t *argp; 1059 u_int code; 1060 int reg, regcnt; 1061 union sysunion args; 1062 register_t *argsdst; 1063 1064 mycpu->gd_cnt.v_syscall++; 1065 1066 KTR_LOG(kernentry_syscall, lp->lwp_proc->p_pid, lp->lwp_tid, 1067 frame->tf_rax); 1068 1069 userenter(td, p); /* lazy raise our priority */ 1070 1071 reg = 0; 1072 regcnt = 6; 1073 /* 1074 * Misc 1075 */ 1076 sticks = (int)td->td_sticks; 1077 orig_tf_rflags = frame->tf_rflags; 1078 1079 /* 1080 * Virtual kernel intercept - if a VM context managed by a virtual 1081 * kernel issues a system call the virtual kernel handles it, not us. 1082 * Restore the virtual kernel context and return from its system 1083 * call. The current frame is copied out to the virtual kernel. 1084 */ 1085 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 1086 vkernel_trap(lp, frame); 1087 error = EJUSTRETURN; 1088 goto out; 1089 } 1090 1091 /* 1092 * Get the system call parameters and account for time 1093 */ 1094 lp->lwp_md.md_regs = frame; 1095 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1096 code = frame->tf_rax; 1097 1098 if (p->p_sysent->sv_prepsyscall) { 1099 (*p->p_sysent->sv_prepsyscall)( 1100 frame, (int *)(&args.nosys.sysmsg + 1), 1101 &code, ¶ms); 1102 } else { 1103 if (code == SYS_syscall || code == SYS___syscall) { 1104 code = frame->tf_rdi; 1105 reg++; 1106 regcnt--; 1107 } 1108 } 1109 1110 if (p->p_sysent->sv_mask) 1111 code &= p->p_sysent->sv_mask; 1112 1113 if (code >= p->p_sysent->sv_size) 1114 callp = &p->p_sysent->sv_table[0]; 1115 else 1116 callp = &p->p_sysent->sv_table[code]; 1117 1118 narg = callp->sy_narg & SYF_ARGMASK; 1119 1120 /* 1121 * On x86_64 we get up to six arguments in registers. The rest are 1122 * on the stack. The first six members of 'struct trapframe' happen 1123 * to be the registers used to pass arguments, in exactly the right 1124 * order. 1125 */ 1126 argp = &frame->tf_rdi; 1127 argp += reg; 1128 argsdst = (register_t *)(&args.nosys.sysmsg + 1); 1129 /* 1130 * JG can we overflow the space pointed to by 'argsdst' 1131 * either with 'bcopy' or with 'copyin'? 1132 */ 1133 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1134 /* 1135 * copyin is MP aware, but the tracing code is not 1136 */ 1137 if (narg > regcnt) { 1138 KASSERT(params != NULL, ("copyin args with no params!")); 1139 error = copyin(params, &argsdst[regcnt], 1140 (narg - regcnt) * sizeof(register_t)); 1141 if (error) { 1142 #ifdef KTRACE 1143 if (KTRPOINT(td, KTR_SYSCALL)) { 1144 ktrsyscall(lp, code, narg, 1145 (void *)(&args.nosys.sysmsg + 1)); 1146 } 1147 #endif 1148 goto bad; 1149 } 1150 } 1151 1152 #ifdef KTRACE 1153 if (KTRPOINT(td, KTR_SYSCALL)) { 1154 ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); 1155 } 1156 #endif 1157 1158 /* 1159 * Default return value is 0 (will be copied to %rax). Double-value 1160 * returns use %rax and %rdx. %rdx is left unchanged for system 1161 * calls which return only one result. 1162 */ 1163 args.sysmsg_fds[0] = 0; 1164 args.sysmsg_fds[1] = frame->tf_rdx; 1165 1166 /* 1167 * The syscall might manipulate the trap frame. If it does it 1168 * will probably return EJUSTRETURN. 1169 */ 1170 args.sysmsg_frame = frame; 1171 1172 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1173 1174 /* 1175 * NOTE: All system calls run MPSAFE now. The system call itself 1176 * is responsible for getting the MP lock. 1177 */ 1178 error = (*callp->sy_call)(&args); 1179 1180 #if 0 1181 kprintf("system call %d returned %d\n", code, error); 1182 #endif 1183 1184 out: 1185 /* 1186 * MP SAFE (we may or may not have the MP lock at this point) 1187 */ 1188 switch (error) { 1189 case 0: 1190 /* 1191 * Reinitialize proc pointer `p' as it may be different 1192 * if this is a child returning from fork syscall. 1193 */ 1194 p = curproc; 1195 lp = curthread->td_lwp; 1196 frame->tf_rax = args.sysmsg_fds[0]; 1197 frame->tf_rdx = args.sysmsg_fds[1]; 1198 frame->tf_rflags &= ~PSL_C; 1199 break; 1200 case ERESTART: 1201 /* 1202 * Reconstruct pc, we know that 'syscall' is 2 bytes. 1203 * We have to do a full context restore so that %r10 1204 * (which was holding the value of %rcx) is restored for 1205 * the next iteration. 1206 */ 1207 frame->tf_rip -= frame->tf_err; 1208 frame->tf_r10 = frame->tf_rcx; 1209 break; 1210 case EJUSTRETURN: 1211 break; 1212 case EASYNC: 1213 panic("Unexpected EASYNC return value (for now)"); 1214 default: 1215 bad: 1216 if (p->p_sysent->sv_errsize) { 1217 if (error >= p->p_sysent->sv_errsize) 1218 error = -1; /* XXX */ 1219 else 1220 error = p->p_sysent->sv_errtbl[error]; 1221 } 1222 frame->tf_rax = error; 1223 frame->tf_rflags |= PSL_C; 1224 break; 1225 } 1226 1227 /* 1228 * Traced syscall. trapsignal() is not MP aware. 1229 */ 1230 if (orig_tf_rflags & PSL_T) { 1231 frame->tf_rflags &= ~PSL_T; 1232 trapsignal(lp, SIGTRAP, 0); 1233 } 1234 1235 /* 1236 * Handle reschedule and other end-of-syscall issues 1237 */ 1238 userret(lp, frame, sticks); 1239 1240 #ifdef KTRACE 1241 if (KTRPOINT(td, KTR_SYSRET)) { 1242 ktrsysret(lp, code, error, args.sysmsg_result); 1243 } 1244 #endif 1245 1246 /* 1247 * This works because errno is findable through the 1248 * register set. If we ever support an emulation where this 1249 * is not the case, this code will need to be revisited. 1250 */ 1251 STOPEVENT(p, S_SCX, code); 1252 1253 userexit(lp); 1254 KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); 1255 #ifdef INVARIANTS 1256 KASSERT(&td->td_toks_base == td->td_toks_stop, 1257 ("syscall: critical section count mismatch! %d/%d", 1258 crit_count, td->td_pri)); 1259 KASSERT(curstop == td->td_toks_stop, 1260 ("syscall: extra tokens held after trap! %ld", 1261 td->td_toks_stop - &td->td_toks_base)); 1262 #endif 1263 } 1264 1265 /* 1266 * NOTE: mplock not held at any point 1267 */ 1268 void 1269 fork_return(struct lwp *lp, struct trapframe *frame) 1270 { 1271 frame->tf_rax = 0; /* Child returns zero */ 1272 frame->tf_rflags &= ~PSL_C; /* success */ 1273 frame->tf_rdx = 1; 1274 1275 generic_lwp_return(lp, frame); 1276 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 1277 } 1278 1279 /* 1280 * Simplified back end of syscall(), used when returning from fork() 1281 * directly into user mode. 1282 * 1283 * This code will return back into the fork trampoline code which then 1284 * runs doreti. 1285 * 1286 * NOTE: The mplock is not held at any point. 1287 */ 1288 void 1289 generic_lwp_return(struct lwp *lp, struct trapframe *frame) 1290 { 1291 struct proc *p = lp->lwp_proc; 1292 1293 /* 1294 * Check for exit-race. If one lwp exits the process concurrent with 1295 * another lwp creating a new thread, the two operations may cross 1296 * each other resulting in the newly-created lwp not receiving a 1297 * KILL signal. 1298 */ 1299 if (p->p_flags & P_WEXIT) { 1300 lwpsignal(p, lp, SIGKILL); 1301 } 1302 1303 /* 1304 * Newly forked processes are given a kernel priority. We have to 1305 * adjust the priority to a normal user priority and fake entry 1306 * into the kernel (call userenter()) to install a passive release 1307 * function just in case userret() decides to stop the process. This 1308 * can occur when ^Z races a fork. If we do not install the passive 1309 * release function the current process designation will not be 1310 * released when the thread goes to sleep. 1311 */ 1312 lwkt_setpri_self(TDPRI_USER_NORM); 1313 userenter(lp->lwp_thread, p); 1314 userret(lp, frame, 0); 1315 #ifdef KTRACE 1316 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) 1317 ktrsysret(lp, SYS_fork, 0, 0); 1318 #endif 1319 lp->lwp_flags |= LWP_PASSIVE_ACQ; 1320 userexit(lp); 1321 lp->lwp_flags &= ~LWP_PASSIVE_ACQ; 1322 } 1323 1324 /* 1325 * doreti has turned into this. The frame is directly on the stack. We 1326 * pull everything else we need (fpu and tls context) from the current 1327 * thread. 1328 * 1329 * Note on fpu interactions: In a virtual kernel, the fpu context for 1330 * an emulated user mode process is not shared with the virtual kernel's 1331 * fpu context, so we only have to 'stack' fpu contexts within the virtual 1332 * kernel itself, and not even then since the signal() contexts that we care 1333 * about save and restore the FPU state (I think anyhow). 1334 * 1335 * vmspace_ctl() returns an error only if it had problems instaling the 1336 * context we supplied or problems copying data to/from our VM space. 1337 */ 1338 void 1339 go_user(struct intrframe *frame) 1340 { 1341 struct trapframe *tf = (void *)&frame->if_rdi; 1342 globaldata_t gd; 1343 int r; 1344 void *id; 1345 1346 /* 1347 * Interrupts may be disabled on entry, make sure all signals 1348 * can be received before beginning our loop. 1349 */ 1350 sigsetmask(0); 1351 1352 /* 1353 * Switch to the current simulated user process, then call 1354 * user_trap() when we break out of it (usually due to a signal). 1355 */ 1356 for (;;) { 1357 #if 1 1358 /* 1359 * Always make the FPU state correct. This should generally 1360 * be faster because the cost of taking a #NM fault through 1361 * the vkernel to the real kernel is astronomical. 1362 */ 1363 crit_enter(); 1364 tf->tf_xflags &= ~PGEX_FPFAULT; 1365 if (mdcpu->gd_npxthread != curthread) { 1366 if (mdcpu->gd_npxthread) 1367 npxsave(mdcpu->gd_npxthread->td_savefpu); 1368 npxdna(tf); 1369 } 1370 #else 1371 /* 1372 * Tell the real kernel whether it is ok to use the FP 1373 * unit or not, allowing us to take a T_DNA exception 1374 * if the context tries to use the FP. 1375 */ 1376 if (mdcpu->gd_npxthread == curthread) { 1377 tf->tf_xflags &= ~PGEX_FPFAULT; 1378 } else { 1379 tf->tf_xflags |= PGEX_FPFAULT; 1380 } 1381 #endif 1382 1383 /* 1384 * Run emulated user process context. This call interlocks 1385 * with new mailbox signals. 1386 * 1387 * Set PGEX_U unconditionally, indicating a user frame (the 1388 * bit is normally set only by T_PAGEFLT). 1389 */ 1390 if (vmm_enabled) 1391 id = (void *)vtophys(curproc->p_vmspace->vm_pmap.pm_pml4); 1392 else 1393 id = &curproc->p_vmspace->vm_pmap; 1394 1395 /* 1396 * The GDF_VIRTUSER hack helps statclock() figure out who 1397 * the tick belongs to. 1398 */ 1399 gd = mycpu; 1400 gd->gd_flags |= GDF_VIRTUSER; 1401 r = vmspace_ctl(id, VMSPACE_CTL_RUN, tf, 1402 &curthread->td_savevext); 1403 gd->gd_flags &= ~GDF_VIRTUSER; 1404 1405 frame->if_xflags |= PGEX_U; 1406 1407 /* 1408 * Immediately save the user FPU state. The vkernel is a 1409 * user program and libraries like libc will use the FP 1410 * unit. 1411 */ 1412 if (mdcpu->gd_npxthread == curthread) { 1413 npxsave(mdcpu->gd_npxthread->td_savefpu); 1414 } 1415 crit_exit(); 1416 #if 0 1417 kprintf("GO USER %d trap %ld EVA %08lx RIP %08lx RSP %08lx XFLAGS %02lx/%02lx\n", 1418 r, tf->tf_trapno, tf->tf_addr, tf->tf_rip, tf->tf_rsp, 1419 tf->tf_xflags, frame->if_xflags); 1420 #endif 1421 if (r < 0) { 1422 if (errno != EINTR) 1423 panic("vmspace_ctl failed error %d", errno); 1424 } else { 1425 if (tf->tf_trapno) { 1426 user_trap(tf); 1427 } 1428 } 1429 if (mycpu->gd_reqflags & RQF_AST_MASK) { 1430 tf->tf_trapno = T_ASTFLT; 1431 user_trap(tf); 1432 } 1433 tf->tf_trapno = 0; 1434 } 1435 } 1436 1437 /* 1438 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA 1439 * fault (which is then passed back to the virtual kernel) if an attempt is 1440 * made to use the FP unit. 1441 * 1442 * XXX this is a fairly big hack. 1443 */ 1444 void 1445 set_vkernel_fp(struct trapframe *frame) 1446 { 1447 struct thread *td = curthread; 1448 1449 if (frame->tf_xflags & PGEX_FPFAULT) { 1450 td->td_pcb->pcb_flags |= FP_VIRTFP; 1451 if (mdcpu->gd_npxthread == td) 1452 npxexit(); 1453 } else { 1454 td->td_pcb->pcb_flags &= ~FP_VIRTFP; 1455 } 1456 } 1457 1458 /* 1459 * Called from vkernel_trap() to fixup the vkernel's syscall 1460 * frame for vmspace_ctl() return. 1461 */ 1462 void 1463 cpu_vkernel_trap(struct trapframe *frame, int error) 1464 { 1465 frame->tf_rax = error; 1466 if (error) 1467 frame->tf_rflags |= PSL_C; 1468 else 1469 frame->tf_rflags &= ~PSL_C; 1470 } 1471