1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ 39 */ 40 41 /* 42 * x86_64 Trap and System call handling 43 */ 44 45 #include "use_isa.h" 46 47 #include "opt_ddb.h" 48 #include "opt_ktrace.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/proc.h> 53 #include <sys/pioctl.h> 54 #include <sys/kernel.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/signal2.h> 58 #include <sys/syscall.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/vmmeter.h> 62 #include <sys/malloc.h> 63 #ifdef KTRACE 64 #include <sys/ktrace.h> 65 #endif 66 #include <sys/ktr.h> 67 #include <sys/vkernel.h> 68 #include <sys/sysproto.h> 69 #include <sys/sysunion.h> 70 #include <sys/vmspace.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_param.h> 74 #include <sys/lock.h> 75 #include <vm/pmap.h> 76 #include <vm/vm_kern.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_page.h> 79 #include <vm/vm_extern.h> 80 81 #include <machine/cpu.h> 82 #include <machine/md_var.h> 83 #include <machine/pcb.h> 84 #include <machine/smp.h> 85 #include <machine/tss.h> 86 #include <machine/globaldata.h> 87 88 #include <ddb/ddb.h> 89 90 #include <sys/msgport2.h> 91 #include <sys/thread2.h> 92 #include <sys/mplock2.h> 93 94 int (*pmath_emulate) (struct trapframe *); 95 96 static int trap_pfault (struct trapframe *, int, vm_offset_t); 97 static void trap_fatal (struct trapframe *, int, vm_offset_t); 98 void dblfault_handler (void); 99 extern int vmm_enabled; 100 101 static struct krate segfltrate = { 1 }; 102 103 #if 0 104 extern inthand_t IDTVEC(syscall); 105 #endif 106 107 #define MAX_TRAP_MSG 30 108 static char *trap_msg[] = { 109 "", /* 0 unused */ 110 "privileged instruction fault", /* 1 T_PRIVINFLT */ 111 "", /* 2 unused */ 112 "breakpoint instruction fault", /* 3 T_BPTFLT */ 113 "", /* 4 unused */ 114 "", /* 5 unused */ 115 "arithmetic trap", /* 6 T_ARITHTRAP */ 116 "system forced exception", /* 7 T_ASTFLT */ 117 "", /* 8 unused */ 118 "general protection fault", /* 9 T_PROTFLT */ 119 "trace trap", /* 10 T_TRCTRAP */ 120 "", /* 11 unused */ 121 "page fault", /* 12 T_PAGEFLT */ 122 "", /* 13 unused */ 123 "alignment fault", /* 14 T_ALIGNFLT */ 124 "", /* 15 unused */ 125 "", /* 16 unused */ 126 "", /* 17 unused */ 127 "integer divide fault", /* 18 T_DIVIDE */ 128 "non-maskable interrupt trap", /* 19 T_NMI */ 129 "overflow trap", /* 20 T_OFLOW */ 130 "FPU bounds check fault", /* 21 T_BOUND */ 131 "FPU device not available", /* 22 T_DNA */ 132 "double fault", /* 23 T_DOUBLEFLT */ 133 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 134 "invalid TSS fault", /* 25 T_TSSFLT */ 135 "segment not present fault", /* 26 T_SEGNPFLT */ 136 "stack fault", /* 27 T_STKFLT */ 137 "machine check trap", /* 28 T_MCHK */ 138 "SIMD floating-point exception", /* 29 T_XMMFLT */ 139 "reserved (unknown) fault", /* 30 T_RESERVED */ 140 }; 141 142 #ifdef DDB 143 static int ddb_on_nmi = 1; 144 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, 145 &ddb_on_nmi, 0, "Go to DDB on NMI"); 146 #endif 147 static int panic_on_nmi = 1; 148 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 149 &panic_on_nmi, 0, "Panic on NMI"); 150 151 /* 152 * Passively intercepts the thread switch function to increase 153 * the thread priority from a user priority to a kernel priority, reducing 154 * syscall and trap overhead for the case where no switch occurs. 155 * 156 * Synchronizes td_ucred with p_ucred. This is used by system calls, 157 * signal handling, faults, AST traps, and anything else that enters the 158 * kernel from userland and provides the kernel with a stable read-only 159 * copy of the process ucred. 160 */ 161 static __inline void 162 userenter(struct thread *curtd, struct proc *curp) 163 { 164 struct ucred *ocred; 165 struct ucred *ncred; 166 167 curtd->td_release = lwkt_passive_release; 168 169 if (curtd->td_ucred != curp->p_ucred) { 170 ncred = crhold(curp->p_ucred); 171 ocred = curtd->td_ucred; 172 curtd->td_ucred = ncred; 173 if (ocred) 174 crfree(ocred); 175 } 176 } 177 178 /* 179 * Handle signals, profiling, and other AST's and/or tasks that 180 * must be completed before we can return to or try to return to userland. 181 * 182 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 183 * arithmatic on the delta calculation so the absolute tick values are 184 * truncated to an integer. 185 */ 186 static void 187 userret(struct lwp *lp, struct trapframe *frame, int sticks) 188 { 189 struct proc *p = lp->lwp_proc; 190 int sig; 191 int ptok; 192 193 /* 194 * Charge system time if profiling. Note: times are in microseconds. 195 * This may do a copyout and block, so do it first even though it 196 * means some system time will be charged as user time. 197 */ 198 if (p->p_flags & P_PROFIL) { 199 addupc_task(p, frame->tf_rip, 200 (u_int)((int)lp->lwp_thread->td_sticks - sticks)); 201 } 202 203 recheck: 204 /* 205 * Specific on-return-to-usermode checks (LWP_MP_WEXIT, 206 * LWP_MP_VNLRU, etc). 207 */ 208 if (lp->lwp_mpflags & LWP_MP_URETMASK) 209 lwpuserret(lp); 210 211 /* 212 * Block here if we are in a stopped state. 213 */ 214 if (STOPLWP(p, lp)) { 215 lwkt_gettoken(&p->p_token); 216 tstop(); 217 lwkt_reltoken(&p->p_token); 218 goto recheck; 219 } 220 221 /* 222 * Post any pending upcalls. If running a virtual kernel be sure 223 * to restore the virtual kernel's vmspace before posting the upcall. 224 */ 225 if (p->p_flags & (P_SIGVTALRM | P_SIGPROF)) { 226 lwkt_gettoken(&p->p_token); 227 if (p->p_flags & P_SIGVTALRM) { 228 p->p_flags &= ~P_SIGVTALRM; 229 ksignal(p, SIGVTALRM); 230 } 231 if (p->p_flags & P_SIGPROF) { 232 p->p_flags &= ~P_SIGPROF; 233 ksignal(p, SIGPROF); 234 } 235 lwkt_reltoken(&p->p_token); 236 goto recheck; 237 } 238 239 /* 240 * Post any pending signals 241 * 242 * WARNING! postsig() can exit and not return. 243 */ 244 if ((sig = CURSIG_LCK_TRACE(lp, &ptok)) != 0) { 245 postsig(sig, ptok); 246 goto recheck; 247 } 248 249 /* 250 * block here if we are swapped out, but still process signals 251 * (such as SIGKILL). proc0 (the swapin scheduler) is already 252 * aware of our situation, we do not have to wake it up. 253 */ 254 if (p->p_flags & P_SWAPPEDOUT) { 255 lwkt_gettoken(&p->p_token); 256 get_mplock(); 257 p->p_flags |= P_SWAPWAIT; 258 swapin_request(); 259 if (p->p_flags & P_SWAPWAIT) 260 tsleep(p, PCATCH, "SWOUT", 0); 261 p->p_flags &= ~P_SWAPWAIT; 262 rel_mplock(); 263 lwkt_reltoken(&p->p_token); 264 goto recheck; 265 } 266 267 /* 268 * In a multi-threaded program it is possible for a thread to change 269 * signal state during a system call which temporarily changes the 270 * signal mask. In this case postsig() might not be run and we 271 * have to restore the mask ourselves. 272 */ 273 if (lp->lwp_flags & LWP_OLDMASK) { 274 lp->lwp_flags &= ~LWP_OLDMASK; 275 lp->lwp_sigmask = lp->lwp_oldsigmask; 276 goto recheck; 277 } 278 } 279 280 /* 281 * Cleanup from userenter and any passive release that might have occured. 282 * We must reclaim the current-process designation before we can return 283 * to usermode. We also handle both LWKT and USER reschedule requests. 284 */ 285 static __inline void 286 userexit(struct lwp *lp) 287 { 288 struct thread *td = lp->lwp_thread; 289 /* globaldata_t gd = td->td_gd; */ 290 291 /* 292 * Handle stop requests at kernel priority. Any requests queued 293 * after this loop will generate another AST. 294 */ 295 while (STOPLWP(lp->lwp_proc, lp)) { 296 lwkt_gettoken(&lp->lwp_proc->p_token); 297 tstop(); 298 lwkt_reltoken(&lp->lwp_proc->p_token); 299 } 300 301 /* 302 * Reduce our priority in preparation for a return to userland. If 303 * our passive release function was still in place, our priority was 304 * never raised and does not need to be reduced. 305 */ 306 lwkt_passive_recover(td); 307 308 /* 309 * Become the current user scheduled process if we aren't already, 310 * and deal with reschedule requests and other factors. 311 */ 312 lp->lwp_proc->p_usched->acquire_curproc(lp); 313 /* WARNING: we may have migrated cpu's */ 314 /* gd = td->td_gd; */ 315 } 316 317 #if !defined(KTR_KERNENTRY) 318 #define KTR_KERNENTRY KTR_ALL 319 #endif 320 KTR_INFO_MASTER(kernentry); 321 KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, 322 "TRAP(pid %hd, tid %hd, trapno %ld, eva %lu)", 323 pid_t pid, lwpid_t tid, register_t trapno, vm_offset_t eva); 324 KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "TRAP_RET(pid %hd, tid %hd)", 325 pid_t pid, lwpid_t tid); 326 KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "SYSC(pid %hd, tid %hd, nr %ld)", 327 pid_t pid, lwpid_t tid, register_t trapno); 328 KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "SYSRET(pid %hd, tid %hd, err %d)", 329 pid_t pid, lwpid_t tid, int err); 330 KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "FORKRET(pid %hd, tid %hd)", 331 pid_t pid, lwpid_t tid); 332 333 /* 334 * Exception, fault, and trap interface to the kernel. 335 * This common code is called from assembly language IDT gate entry 336 * routines that prepare a suitable stack frame, and restore this 337 * frame after the exception has been processed. 338 * 339 * This function is also called from doreti in an interlock to handle ASTs. 340 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap 341 * 342 * NOTE! We have to retrieve the fault address prior to obtaining the 343 * MP lock because get_mplock() may switch out. YYY cr2 really ought 344 * to be retrieved by the assembly code, not here. 345 * 346 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing 347 * if an attempt is made to switch from a fast interrupt or IPI. This is 348 * necessary to properly take fatal kernel traps on SMP machines if 349 * get_mplock() has to block. 350 */ 351 352 void 353 user_trap(struct trapframe *frame) 354 { 355 struct globaldata *gd = mycpu; 356 struct thread *td = gd->gd_curthread; 357 struct lwp *lp = td->td_lwp; 358 struct proc *p; 359 int sticks = 0; 360 int i = 0, ucode = 0, type, code; 361 #ifdef INVARIANTS 362 int crit_count = td->td_critcount; 363 lwkt_tokref_t curstop = td->td_toks_stop; 364 #endif 365 vm_offset_t eva; 366 367 p = td->td_proc; 368 369 if (frame->tf_trapno == T_PAGEFLT) 370 eva = frame->tf_addr; 371 else 372 eva = 0; 373 #if 0 374 kprintf("USER_TRAP AT %08lx xflags %ld trapno %ld eva %08lx\n", 375 frame->tf_rip, frame->tf_xflags, frame->tf_trapno, eva); 376 #endif 377 378 /* 379 * Everything coming from user mode runs through user_trap, 380 * including system calls. 381 */ 382 if (frame->tf_trapno == T_FAST_SYSCALL) { 383 syscall2(frame); 384 return; 385 } 386 387 KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, 388 frame->tf_trapno, eva); 389 390 #ifdef DDB 391 if (db_active) { 392 eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); 393 ++gd->gd_trap_nesting_level; 394 trap_fatal(frame, TRUE, eva); 395 --gd->gd_trap_nesting_level; 396 goto out2; 397 } 398 #endif 399 400 type = frame->tf_trapno; 401 code = frame->tf_err; 402 403 userenter(td, p); 404 405 sticks = (int)td->td_sticks; 406 lp->lwp_md.md_regs = frame; 407 408 switch (type) { 409 case T_PRIVINFLT: /* privileged instruction fault */ 410 i = SIGILL; 411 ucode = ILL_PRVOPC; 412 break; 413 414 case T_BPTFLT: /* bpt instruction fault */ 415 case T_TRCTRAP: /* trace trap */ 416 frame->tf_rflags &= ~PSL_T; 417 i = SIGTRAP; 418 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); 419 break; 420 421 case T_ARITHTRAP: /* arithmetic trap */ 422 ucode = code; 423 i = SIGFPE; 424 break; 425 426 case T_ASTFLT: /* Allow process switch */ 427 mycpu->gd_cnt.v_soft++; 428 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { 429 atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); 430 addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); 431 } 432 goto out; 433 434 /* 435 * The following two traps can happen in 436 * vm86 mode, and, if so, we want to handle 437 * them specially. 438 */ 439 case T_PROTFLT: /* general protection fault */ 440 case T_STKFLT: /* stack fault */ 441 #if 0 442 if (frame->tf_eflags & PSL_VM) { 443 i = vm86_emulate((struct vm86frame *)frame); 444 if (i == 0) 445 goto out; 446 break; 447 } 448 #endif 449 /* FALL THROUGH */ 450 451 case T_SEGNPFLT: /* segment not present fault */ 452 case T_TSSFLT: /* invalid TSS fault */ 453 case T_DOUBLEFLT: /* double fault */ 454 default: 455 i = SIGBUS; 456 ucode = code + BUS_SEGM_FAULT ; 457 break; 458 459 case T_PAGEFLT: /* page fault */ 460 i = trap_pfault(frame, TRUE, eva); 461 if (i == -1 || i == 0) 462 goto out; 463 464 465 if (i == SIGSEGV) 466 ucode = SEGV_MAPERR; 467 else { 468 i = SIGSEGV; 469 ucode = SEGV_ACCERR; 470 } 471 break; 472 473 case T_DIVIDE: /* integer divide fault */ 474 ucode = FPE_INTDIV; 475 i = SIGFPE; 476 break; 477 478 #if NISA > 0 479 case T_NMI: 480 /* machine/parity/power fail/"kitchen sink" faults */ 481 if (isa_nmi(code) == 0) { 482 #ifdef DDB 483 /* 484 * NMI can be hooked up to a pushbutton 485 * for debugging. 486 */ 487 if (ddb_on_nmi) { 488 kprintf ("NMI ... going to debugger\n"); 489 kdb_trap(type, 0, frame); 490 } 491 #endif /* DDB */ 492 goto out2; 493 } else if (panic_on_nmi) 494 panic("NMI indicates hardware failure"); 495 break; 496 #endif /* NISA > 0 */ 497 498 case T_OFLOW: /* integer overflow fault */ 499 ucode = FPE_INTOVF; 500 i = SIGFPE; 501 break; 502 503 case T_BOUND: /* bounds check fault */ 504 ucode = FPE_FLTSUB; 505 i = SIGFPE; 506 break; 507 508 case T_DNA: 509 /* 510 * Virtual kernel intercept - pass the DNA exception 511 * to the (emulated) virtual kernel if it asked to handle 512 * it. This occurs when the virtual kernel is holding 513 * onto the FP context for a different emulated 514 * process then the one currently running. 515 * 516 * We must still call npxdna() since we may have 517 * saved FP state that the (emulated) virtual kernel 518 * needs to hand over to a different emulated process. 519 */ 520 if (lp->lwp_vkernel && lp->lwp_vkernel->ve && 521 (td->td_pcb->pcb_flags & FP_VIRTFP) 522 ) { 523 npxdna(frame); 524 break; 525 } 526 527 /* 528 * The kernel may have switched out the FP unit's 529 * state, causing the user process to take a fault 530 * when it tries to use the FP unit. Restore the 531 * state here 532 */ 533 if (npxdna(frame)) { 534 gd->gd_cnt.v_trap++; 535 goto out; 536 } 537 if (!pmath_emulate) { 538 i = SIGFPE; 539 ucode = FPE_FPU_NP_TRAP; 540 break; 541 } 542 i = (*pmath_emulate)(frame); 543 if (i == 0) { 544 if (!(frame->tf_rflags & PSL_T)) 545 goto out2; 546 frame->tf_rflags &= ~PSL_T; 547 i = SIGTRAP; 548 } 549 /* else ucode = emulator_only_knows() XXX */ 550 break; 551 552 case T_FPOPFLT: /* FPU operand fetch fault */ 553 ucode = T_FPOPFLT; 554 i = SIGILL; 555 break; 556 557 case T_XMMFLT: /* SIMD floating-point exception */ 558 ucode = 0; /* XXX */ 559 i = SIGFPE; 560 break; 561 } 562 563 /* 564 * Virtual kernel intercept - if the fault is directly related to a 565 * VM context managed by a virtual kernel then let the virtual kernel 566 * handle it. 567 */ 568 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 569 vkernel_trap(lp, frame); 570 goto out; 571 } 572 573 /* 574 * Translate fault for emulators (e.g. Linux) 575 */ 576 if (*p->p_sysent->sv_transtrap) 577 i = (*p->p_sysent->sv_transtrap)(i, type); 578 579 trapsignal(lp, i, ucode); 580 581 #ifdef DEBUG 582 if (type <= MAX_TRAP_MSG) { 583 uprintf("fatal process exception: %s", 584 trap_msg[type]); 585 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 586 uprintf(", fault VA = 0x%lx", (u_long)eva); 587 uprintf("\n"); 588 } 589 #endif 590 591 out: 592 userret(lp, frame, sticks); 593 userexit(lp); 594 out2: ; 595 KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 596 #ifdef INVARIANTS 597 KASSERT(crit_count == td->td_critcount, 598 ("trap: critical section count mismatch! %d/%d", 599 crit_count, td->td_pri)); 600 KASSERT(curstop == td->td_toks_stop, 601 ("trap: extra tokens held after trap! %ld/%ld", 602 curstop - &td->td_toks_base, 603 td->td_toks_stop - &td->td_toks_base)); 604 #endif 605 } 606 607 void 608 kern_trap(struct trapframe *frame) 609 { 610 struct globaldata *gd = mycpu; 611 struct thread *td = gd->gd_curthread; 612 struct lwp *lp; 613 struct proc *p; 614 int i = 0, ucode = 0, type, code; 615 #ifdef INVARIANTS 616 int crit_count = td->td_critcount; 617 lwkt_tokref_t curstop = td->td_toks_stop; 618 #endif 619 vm_offset_t eva; 620 621 lp = td->td_lwp; 622 p = td->td_proc; 623 624 if (frame->tf_trapno == T_PAGEFLT) 625 eva = frame->tf_addr; 626 else 627 eva = 0; 628 629 #ifdef DDB 630 if (db_active) { 631 ++gd->gd_trap_nesting_level; 632 trap_fatal(frame, FALSE, eva); 633 --gd->gd_trap_nesting_level; 634 goto out2; 635 } 636 #endif 637 638 type = frame->tf_trapno; 639 code = frame->tf_err; 640 641 #if 0 642 kernel_trap: 643 #endif 644 /* kernel trap */ 645 646 switch (type) { 647 case T_PAGEFLT: /* page fault */ 648 trap_pfault(frame, FALSE, eva); 649 goto out2; 650 651 case T_DNA: 652 /* 653 * The kernel may be using npx for copying or other 654 * purposes. 655 */ 656 panic("kernel NPX should not happen"); 657 if (npxdna(frame)) 658 goto out2; 659 break; 660 661 case T_PROTFLT: /* general protection fault */ 662 case T_SEGNPFLT: /* segment not present fault */ 663 /* 664 * Invalid segment selectors and out of bounds 665 * %eip's and %esp's can be set up in user mode. 666 * This causes a fault in kernel mode when the 667 * kernel tries to return to user mode. We want 668 * to get this fault so that we can fix the 669 * problem here and not have to check all the 670 * selectors and pointers when the user changes 671 * them. 672 */ 673 if (mycpu->gd_intr_nesting_level == 0) { 674 if (td->td_pcb->pcb_onfault) { 675 frame->tf_rip = 676 (register_t)td->td_pcb->pcb_onfault; 677 goto out2; 678 } 679 } 680 break; 681 682 case T_TSSFLT: 683 /* 684 * PSL_NT can be set in user mode and isn't cleared 685 * automatically when the kernel is entered. This 686 * causes a TSS fault when the kernel attempts to 687 * `iret' because the TSS link is uninitialized. We 688 * want to get this fault so that we can fix the 689 * problem here and not every time the kernel is 690 * entered. 691 */ 692 if (frame->tf_rflags & PSL_NT) { 693 frame->tf_rflags &= ~PSL_NT; 694 goto out2; 695 } 696 break; 697 698 case T_TRCTRAP: /* trace trap */ 699 #if 0 700 if (frame->tf_eip == (int)IDTVEC(syscall)) { 701 /* 702 * We've just entered system mode via the 703 * syscall lcall. Continue single stepping 704 * silently until the syscall handler has 705 * saved the flags. 706 */ 707 goto out2; 708 } 709 if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { 710 /* 711 * The syscall handler has now saved the 712 * flags. Stop single stepping it. 713 */ 714 frame->tf_eflags &= ~PSL_T; 715 goto out2; 716 } 717 #endif 718 #if 0 719 /* 720 * Ignore debug register trace traps due to 721 * accesses in the user's address space, which 722 * can happen under several conditions such as 723 * if a user sets a watchpoint on a buffer and 724 * then passes that buffer to a system call. 725 * We still want to get TRCTRAPS for addresses 726 * in kernel space because that is useful when 727 * debugging the kernel. 728 */ 729 if (user_dbreg_trap()) { 730 /* 731 * Reset breakpoint bits because the 732 * processor doesn't 733 */ 734 load_dr6(rdr6() & 0xfffffff0); 735 goto out2; 736 } 737 #endif 738 /* 739 * Fall through (TRCTRAP kernel mode, kernel address) 740 */ 741 case T_BPTFLT: 742 /* 743 * If DDB is enabled, let it handle the debugger trap. 744 * Otherwise, debugger traps "can't happen". 745 */ 746 #ifdef DDB 747 if (kdb_trap (type, 0, frame)) 748 goto out2; 749 #endif 750 break; 751 case T_DIVIDE: 752 trap_fatal(frame, FALSE, eva); 753 goto out2; 754 case T_NMI: 755 trap_fatal(frame, FALSE, eva); 756 goto out2; 757 case T_SYSCALL80: 758 case T_FAST_SYSCALL: 759 /* 760 * Ignore this trap generated from a spurious SIGTRAP. 761 * 762 * single stepping in / syscalls leads to spurious / SIGTRAP 763 * so ignore 764 * 765 * Haiku (c) 2007 Simon 'corecode' Schubert 766 */ 767 goto out2; 768 } 769 770 /* 771 * Translate fault for emulators (e.g. Linux) 772 */ 773 if (*p->p_sysent->sv_transtrap) 774 i = (*p->p_sysent->sv_transtrap)(i, type); 775 776 gd->gd_cnt.v_trap++; 777 trapsignal(lp, i, ucode); 778 779 #ifdef DEBUG 780 if (type <= MAX_TRAP_MSG) { 781 uprintf("fatal process exception: %s", 782 trap_msg[type]); 783 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 784 uprintf(", fault VA = 0x%lx", (u_long)eva); 785 uprintf("\n"); 786 } 787 #endif 788 789 out2: 790 ; 791 #ifdef INVARIANTS 792 KASSERT(crit_count == td->td_critcount, 793 ("trap: critical section count mismatch! %d/%d", 794 crit_count, td->td_pri)); 795 KASSERT(curstop == td->td_toks_stop, 796 ("trap: extra tokens held after trap! %ld/%ld", 797 curstop - &td->td_toks_base, 798 td->td_toks_stop - &td->td_toks_base)); 799 #endif 800 } 801 802 int 803 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) 804 { 805 vm_offset_t va; 806 struct vmspace *vm = NULL; 807 vm_map_t map = 0; 808 int rv = 0; 809 vm_prot_t ftype; 810 thread_t td = curthread; 811 struct lwp *lp = td->td_lwp; 812 int fault_flags; 813 814 va = trunc_page(eva); 815 if (usermode == FALSE) { 816 /* 817 * This is a fault on kernel virtual memory. 818 */ 819 map = &kernel_map; 820 } else { 821 /* 822 * This is a fault on non-kernel virtual memory. 823 * vm is initialized above to NULL. If curproc is NULL 824 * or curproc->p_vmspace is NULL the fault is fatal. 825 */ 826 if (lp != NULL) 827 vm = lp->lwp_vmspace; 828 829 if (vm == NULL) 830 goto nogo; 831 832 map = &vm->vm_map; 833 } 834 835 if (frame->tf_err & PGEX_W) 836 ftype = VM_PROT_READ | VM_PROT_WRITE; 837 else if (frame->tf_err & PGEX_I) 838 ftype = VM_PROT_EXECUTE; 839 else 840 ftype = VM_PROT_READ; 841 842 if (map != &kernel_map) { 843 /* 844 * Keep swapout from messing with us during this 845 * critical time. 846 */ 847 PHOLD(lp->lwp_proc); 848 849 #if 0 850 /* 851 * Grow the stack if necessary 852 */ 853 /* grow_stack returns false only if va falls into 854 * a growable stack region and the stack growth 855 * fails. It returns true if va was not within 856 * a growable stack region, or if the stack 857 * growth succeeded. 858 */ 859 if (!grow_stack (map, va)) { 860 rv = KERN_FAILURE; 861 PRELE(lp->lwp_proc); 862 goto nogo; 863 } 864 #endif 865 866 fault_flags = 0; 867 if (usermode) 868 fault_flags |= VM_FAULT_BURST | VM_FAULT_USERMODE; 869 if (ftype & VM_PROT_WRITE) 870 fault_flags |= VM_FAULT_DIRTY; 871 else 872 fault_flags |= VM_FAULT_NORMAL; 873 rv = vm_fault(map, va, ftype, fault_flags); 874 875 PRELE(lp->lwp_proc); 876 } else { 877 /* 878 * Don't have to worry about process locking or stacks in the kernel. 879 */ 880 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 881 } 882 883 if (rv == KERN_SUCCESS) 884 return (0); 885 nogo: 886 if (!usermode) { 887 if (td->td_gd->gd_intr_nesting_level == 0 && 888 td->td_pcb->pcb_onfault) { 889 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; 890 return (0); 891 } 892 trap_fatal(frame, usermode, eva); 893 return (-1); 894 } 895 896 /* 897 * NOTE: on x86_64 we have a tf_addr field in the trapframe, no 898 * kludge is needed to pass the fault address to signal handlers. 899 */ 900 struct proc *p = td->td_proc; 901 krateprintf(&segfltrate, 902 "seg-fault accessing address %p " 903 "rip=%p pid=%d p_comm=%s\n", 904 (void *)va, 905 (void *)frame->tf_rip, p->p_pid, p->p_comm); 906 /* Debugger("seg-fault"); */ 907 908 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 909 } 910 911 static void 912 trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) 913 { 914 int code, type, ss; 915 long rsp; 916 917 code = frame->tf_xflags; 918 type = frame->tf_trapno; 919 920 if (type <= MAX_TRAP_MSG) { 921 kprintf("\n\nFatal trap %d: %s while in %s mode\n", 922 type, trap_msg[type], 923 (usermode ? "user" : "kernel")); 924 } 925 /* two separate prints in case of a trap on an unmapped page */ 926 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 927 if (type == T_PAGEFLT) { 928 kprintf("fault virtual address = %p\n", (void *)eva); 929 kprintf("fault code = %s %s, %s\n", 930 usermode ? "user" : "supervisor", 931 code & PGEX_W ? "write" : "read", 932 code & PGEX_P ? "protection violation" : "page not present"); 933 } 934 kprintf("instruction pointer = 0x%lx:0x%lx\n", 935 frame->tf_cs & 0xffff, frame->tf_rip); 936 if (usermode) { 937 ss = frame->tf_ss & 0xffff; 938 rsp = frame->tf_rsp; 939 } else { 940 ss = GSEL(GDATA_SEL, SEL_KPL); 941 rsp = (long)&frame->tf_rsp; 942 } 943 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); 944 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 945 kprintf("processor eflags = "); 946 if (frame->tf_rflags & PSL_T) 947 kprintf("trace trap, "); 948 if (frame->tf_rflags & PSL_I) 949 kprintf("interrupt enabled, "); 950 if (frame->tf_rflags & PSL_NT) 951 kprintf("nested task, "); 952 if (frame->tf_rflags & PSL_RF) 953 kprintf("resume, "); 954 #if 0 955 if (frame->tf_eflags & PSL_VM) 956 kprintf("vm86, "); 957 #endif 958 kprintf("IOPL = %jd\n", (intmax_t)((frame->tf_rflags & PSL_IOPL) >> 12)); 959 kprintf("current process = "); 960 if (curproc) { 961 kprintf("%lu (%s)\n", 962 (u_long)curproc->p_pid, curproc->p_comm ? 963 curproc->p_comm : ""); 964 } else { 965 kprintf("Idle\n"); 966 } 967 kprintf("current thread = pri %d ", curthread->td_pri); 968 if (curthread->td_critcount) 969 kprintf("(CRIT)"); 970 kprintf("\n"); 971 /** 972 * XXX FIXME: 973 * we probably SHOULD have stopped the other CPUs before now! 974 * another CPU COULD have been touching cpl at this moment... 975 */ 976 kprintf(" <- SMP: XXX"); 977 kprintf("\n"); 978 979 #ifdef KDB 980 if (kdb_trap(&psl)) 981 return; 982 #endif 983 #ifdef DDB 984 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) 985 return; 986 #endif 987 kprintf("trap number = %d\n", type); 988 if (type <= MAX_TRAP_MSG) 989 panic("%s", trap_msg[type]); 990 else 991 panic("unknown/reserved trap"); 992 } 993 994 /* 995 * Double fault handler. Called when a fault occurs while writing 996 * a frame for a trap/exception onto the stack. This usually occurs 997 * when the stack overflows (such is the case with infinite recursion, 998 * for example). 999 * 1000 * XXX Note that the current PTD gets replaced by IdlePTD when the 1001 * task switch occurs. This means that the stack that was active at 1002 * the time of the double fault is not available at <kstack> unless 1003 * the machine was idle when the double fault occurred. The downside 1004 * of this is that "trace <ebp>" in ddb won't work. 1005 */ 1006 void 1007 dblfault_handler(void) 1008 { 1009 #if 0 /* JG */ 1010 struct mdglobaldata *gd = mdcpu; 1011 #endif 1012 1013 kprintf("\nFatal double fault:\n"); 1014 #if 0 /* JG */ 1015 kprintf("rip = 0x%lx\n", gd->gd_common_tss.tss_rip); 1016 kprintf("rsp = 0x%lx\n", gd->gd_common_tss.tss_rsp); 1017 kprintf("rbp = 0x%lx\n", gd->gd_common_tss.tss_rbp); 1018 #endif 1019 /* two separate prints in case of a trap on an unmapped page */ 1020 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 1021 panic("double fault"); 1022 } 1023 1024 /* 1025 * syscall2 - MP aware system call request C handler 1026 * 1027 * A system call is essentially treated as a trap except that the 1028 * MP lock is not held on entry or return. We are responsible for 1029 * obtaining the MP lock if necessary and for handling ASTs 1030 * (e.g. a task switch) prior to return. 1031 * 1032 * In general, only simple access and manipulation of curproc and 1033 * the current stack is allowed without having to hold MP lock. 1034 * 1035 * MPSAFE - note that large sections of this routine are run without 1036 * the MP lock. 1037 */ 1038 void 1039 syscall2(struct trapframe *frame) 1040 { 1041 struct thread *td = curthread; 1042 struct proc *p = td->td_proc; 1043 struct lwp *lp = td->td_lwp; 1044 caddr_t params; 1045 struct sysent *callp; 1046 register_t orig_tf_rflags; 1047 int sticks; 1048 int error; 1049 int narg; 1050 #ifdef INVARIANTS 1051 int crit_count = td->td_critcount; 1052 lwkt_tokref_t curstop = td->td_toks_stop; 1053 #endif 1054 register_t *argp; 1055 u_int code; 1056 int reg, regcnt; 1057 union sysunion args; 1058 register_t *argsdst; 1059 1060 mycpu->gd_cnt.v_syscall++; 1061 1062 KTR_LOG(kernentry_syscall, lp->lwp_proc->p_pid, lp->lwp_tid, 1063 frame->tf_rax); 1064 1065 userenter(td, p); /* lazy raise our priority */ 1066 1067 reg = 0; 1068 regcnt = 6; 1069 /* 1070 * Misc 1071 */ 1072 sticks = (int)td->td_sticks; 1073 orig_tf_rflags = frame->tf_rflags; 1074 1075 /* 1076 * Virtual kernel intercept - if a VM context managed by a virtual 1077 * kernel issues a system call the virtual kernel handles it, not us. 1078 * Restore the virtual kernel context and return from its system 1079 * call. The current frame is copied out to the virtual kernel. 1080 */ 1081 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 1082 vkernel_trap(lp, frame); 1083 error = EJUSTRETURN; 1084 callp = NULL; 1085 code = 0; 1086 goto out; 1087 } 1088 1089 /* 1090 * Get the system call parameters and account for time 1091 */ 1092 lp->lwp_md.md_regs = frame; 1093 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1094 code = frame->tf_rax; 1095 1096 if (code == SYS_syscall || code == SYS___syscall) { 1097 code = frame->tf_rdi; 1098 reg++; 1099 regcnt--; 1100 } 1101 1102 if (code >= p->p_sysent->sv_size) 1103 callp = &p->p_sysent->sv_table[0]; 1104 else 1105 callp = &p->p_sysent->sv_table[code]; 1106 1107 narg = callp->sy_narg; 1108 1109 /* 1110 * On x86_64 we get up to six arguments in registers. The rest are 1111 * on the stack. The first six members of 'struct trapframe' happen 1112 * to be the registers used to pass arguments, in exactly the right 1113 * order. 1114 */ 1115 argp = &frame->tf_rdi; 1116 argp += reg; 1117 argsdst = (register_t *)(&args.nosys.sysmsg + 1); 1118 1119 /* 1120 * JG can we overflow the space pointed to by 'argsdst' 1121 * either with 'bcopy' or with 'copyin'? 1122 */ 1123 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1124 1125 /* 1126 * copyin is MP aware, but the tracing code is not 1127 */ 1128 if (narg > regcnt) { 1129 KASSERT(params != NULL, ("copyin args with no params!")); 1130 error = copyin(params, &argsdst[regcnt], 1131 (narg - regcnt) * sizeof(register_t)); 1132 if (error) { 1133 #ifdef KTRACE 1134 if (KTRPOINT(td, KTR_SYSCALL)) { 1135 ktrsyscall(lp, code, narg, 1136 (void *)(&args.nosys.sysmsg + 1)); 1137 } 1138 #endif 1139 goto bad; 1140 } 1141 } 1142 1143 #ifdef KTRACE 1144 if (KTRPOINT(td, KTR_SYSCALL)) { 1145 ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); 1146 } 1147 #endif 1148 1149 /* 1150 * Default return value is 0 (will be copied to %rax). Double-value 1151 * returns use %rax and %rdx. %rdx is left unchanged for system 1152 * calls which return only one result. 1153 */ 1154 args.sysmsg_fds[0] = 0; 1155 args.sysmsg_fds[1] = frame->tf_rdx; 1156 1157 /* 1158 * The syscall might manipulate the trap frame. If it does it 1159 * will probably return EJUSTRETURN. 1160 */ 1161 args.sysmsg_frame = frame; 1162 1163 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1164 1165 /* 1166 * NOTE: All system calls run MPSAFE now. The system call itself 1167 * is responsible for getting the MP lock. 1168 */ 1169 error = (*callp->sy_call)(&args); 1170 1171 #if 0 1172 kprintf("system call %d returned %d\n", code, error); 1173 #endif 1174 1175 out: 1176 /* 1177 * MP SAFE (we may or may not have the MP lock at this point) 1178 */ 1179 switch (error) { 1180 case 0: 1181 /* 1182 * Reinitialize proc pointer `p' as it may be different 1183 * if this is a child returning from fork syscall. 1184 */ 1185 p = curproc; 1186 lp = curthread->td_lwp; 1187 frame->tf_rax = args.sysmsg_fds[0]; 1188 frame->tf_rdx = args.sysmsg_fds[1]; 1189 frame->tf_rflags &= ~PSL_C; 1190 break; 1191 case ERESTART: 1192 /* 1193 * Reconstruct pc, we know that 'syscall' is 2 bytes. 1194 * We have to do a full context restore so that %r10 1195 * (which was holding the value of %rcx) is restored for 1196 * the next iteration. 1197 */ 1198 frame->tf_rip -= frame->tf_err; 1199 frame->tf_r10 = frame->tf_rcx; 1200 break; 1201 case EJUSTRETURN: 1202 break; 1203 case EASYNC: 1204 panic("Unexpected EASYNC return value (for now)"); 1205 default: 1206 bad: 1207 if (p->p_sysent->sv_errsize) { 1208 if (error >= p->p_sysent->sv_errsize) 1209 error = -1; /* XXX */ 1210 else 1211 error = p->p_sysent->sv_errtbl[error]; 1212 } 1213 frame->tf_rax = error; 1214 frame->tf_rflags |= PSL_C; 1215 break; 1216 } 1217 1218 /* 1219 * Traced syscall. trapsignal() is not MP aware. 1220 */ 1221 if (orig_tf_rflags & PSL_T) { 1222 frame->tf_rflags &= ~PSL_T; 1223 trapsignal(lp, SIGTRAP, 0); 1224 } 1225 1226 /* 1227 * Handle reschedule and other end-of-syscall issues 1228 */ 1229 userret(lp, frame, sticks); 1230 1231 #ifdef KTRACE 1232 if (KTRPOINT(td, KTR_SYSRET)) { 1233 ktrsysret(lp, code, error, args.sysmsg_result); 1234 } 1235 #endif 1236 1237 /* 1238 * This works because errno is findable through the 1239 * register set. If we ever support an emulation where this 1240 * is not the case, this code will need to be revisited. 1241 */ 1242 STOPEVENT(p, S_SCX, code); 1243 1244 userexit(lp); 1245 KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); 1246 #ifdef INVARIANTS 1247 KASSERT(&td->td_toks_base == td->td_toks_stop, 1248 ("syscall: critical section count mismatch! %d/%d", 1249 crit_count, td->td_pri)); 1250 KASSERT(curstop == td->td_toks_stop, 1251 ("syscall: extra tokens held after trap! %ld", 1252 td->td_toks_stop - &td->td_toks_base)); 1253 #endif 1254 } 1255 1256 /* 1257 * NOTE: mplock not held at any point 1258 */ 1259 void 1260 fork_return(struct lwp *lp, struct trapframe *frame) 1261 { 1262 frame->tf_rax = 0; /* Child returns zero */ 1263 frame->tf_rflags &= ~PSL_C; /* success */ 1264 frame->tf_rdx = 1; 1265 1266 generic_lwp_return(lp, frame); 1267 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 1268 } 1269 1270 /* 1271 * Simplified back end of syscall(), used when returning from fork() 1272 * directly into user mode. 1273 * 1274 * This code will return back into the fork trampoline code which then 1275 * runs doreti. 1276 * 1277 * NOTE: The mplock is not held at any point. 1278 */ 1279 void 1280 generic_lwp_return(struct lwp *lp, struct trapframe *frame) 1281 { 1282 struct proc *p = lp->lwp_proc; 1283 1284 /* 1285 * Check for exit-race. If one lwp exits the process concurrent with 1286 * another lwp creating a new thread, the two operations may cross 1287 * each other resulting in the newly-created lwp not receiving a 1288 * KILL signal. 1289 */ 1290 if (p->p_flags & P_WEXIT) { 1291 lwpsignal(p, lp, SIGKILL); 1292 } 1293 1294 /* 1295 * Newly forked processes are given a kernel priority. We have to 1296 * adjust the priority to a normal user priority and fake entry 1297 * into the kernel (call userenter()) to install a passive release 1298 * function just in case userret() decides to stop the process. This 1299 * can occur when ^Z races a fork. If we do not install the passive 1300 * release function the current process designation will not be 1301 * released when the thread goes to sleep. 1302 */ 1303 lwkt_setpri_self(TDPRI_USER_NORM); 1304 userenter(lp->lwp_thread, p); 1305 userret(lp, frame, 0); 1306 #ifdef KTRACE 1307 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) 1308 ktrsysret(lp, SYS_fork, 0, 0); 1309 #endif 1310 lp->lwp_flags |= LWP_PASSIVE_ACQ; 1311 userexit(lp); 1312 lp->lwp_flags &= ~LWP_PASSIVE_ACQ; 1313 } 1314 1315 /* 1316 * doreti has turned into this. The frame is directly on the stack. We 1317 * pull everything else we need (fpu and tls context) from the current 1318 * thread. 1319 * 1320 * Note on fpu interactions: In a virtual kernel, the fpu context for 1321 * an emulated user mode process is not shared with the virtual kernel's 1322 * fpu context, so we only have to 'stack' fpu contexts within the virtual 1323 * kernel itself, and not even then since the signal() contexts that we care 1324 * about save and restore the FPU state (I think anyhow). 1325 * 1326 * vmspace_ctl() returns an error only if it had problems instaling the 1327 * context we supplied or problems copying data to/from our VM space. 1328 */ 1329 void 1330 go_user(struct intrframe *frame) 1331 { 1332 struct trapframe *tf = (void *)&frame->if_rdi; 1333 globaldata_t gd; 1334 int r; 1335 void *id; 1336 1337 /* 1338 * Interrupts may be disabled on entry, make sure all signals 1339 * can be received before beginning our loop. 1340 */ 1341 sigsetmask(0); 1342 1343 /* 1344 * Switch to the current simulated user process, then call 1345 * user_trap() when we break out of it (usually due to a signal). 1346 */ 1347 for (;;) { 1348 #if 1 1349 /* 1350 * Always make the FPU state correct. This should generally 1351 * be faster because the cost of taking a #NM fault through 1352 * the vkernel to the real kernel is astronomical. 1353 */ 1354 crit_enter(); 1355 tf->tf_xflags &= ~PGEX_FPFAULT; 1356 if (mdcpu->gd_npxthread != curthread) { 1357 if (mdcpu->gd_npxthread) 1358 npxsave(mdcpu->gd_npxthread->td_savefpu); 1359 npxdna(tf); 1360 } 1361 #else 1362 /* 1363 * Tell the real kernel whether it is ok to use the FP 1364 * unit or not, allowing us to take a T_DNA exception 1365 * if the context tries to use the FP. 1366 */ 1367 if (mdcpu->gd_npxthread == curthread) { 1368 tf->tf_xflags &= ~PGEX_FPFAULT; 1369 } else { 1370 tf->tf_xflags |= PGEX_FPFAULT; 1371 } 1372 #endif 1373 1374 /* 1375 * Run emulated user process context. This call interlocks 1376 * with new mailbox signals. 1377 * 1378 * Set PGEX_U unconditionally, indicating a user frame (the 1379 * bit is normally set only by T_PAGEFLT). 1380 */ 1381 if (vmm_enabled) 1382 id = (void *)vtophys(curproc->p_vmspace->vm_pmap.pm_pml4); 1383 else 1384 id = &curproc->p_vmspace->vm_pmap; 1385 1386 /* 1387 * The GDF_VIRTUSER hack helps statclock() figure out who 1388 * the tick belongs to. 1389 */ 1390 gd = mycpu; 1391 gd->gd_flags |= GDF_VIRTUSER; 1392 r = vmspace_ctl(id, VMSPACE_CTL_RUN, tf, 1393 &curthread->td_savevext); 1394 1395 frame->if_xflags |= PGEX_U; 1396 1397 /* 1398 * Immediately save the user FPU state. The vkernel is a 1399 * user program and libraries like libc will use the FP 1400 * unit. 1401 */ 1402 if (mdcpu->gd_npxthread == curthread) { 1403 npxsave(mdcpu->gd_npxthread->td_savefpu); 1404 } 1405 crit_exit(); 1406 gd->gd_flags &= ~GDF_VIRTUSER; 1407 #if 0 1408 kprintf("GO USER %d trap %ld EVA %08lx RIP %08lx RSP %08lx XFLAGS %02lx/%02lx\n", 1409 r, tf->tf_trapno, tf->tf_addr, tf->tf_rip, tf->tf_rsp, 1410 tf->tf_xflags, frame->if_xflags); 1411 #endif 1412 if (r < 0) { 1413 if (errno != EINTR) 1414 panic("vmspace_ctl failed error %d", errno); 1415 } else { 1416 if (tf->tf_trapno) { 1417 user_trap(tf); 1418 } 1419 } 1420 if (mycpu->gd_reqflags & RQF_AST_MASK) { 1421 tf->tf_trapno = T_ASTFLT; 1422 user_trap(tf); 1423 } 1424 tf->tf_trapno = 0; 1425 } 1426 } 1427 1428 /* 1429 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA 1430 * fault (which is then passed back to the virtual kernel) if an attempt is 1431 * made to use the FP unit. 1432 * 1433 * XXX this is a fairly big hack. 1434 */ 1435 void 1436 set_vkernel_fp(struct trapframe *frame) 1437 { 1438 struct thread *td = curthread; 1439 1440 if (frame->tf_xflags & PGEX_FPFAULT) { 1441 td->td_pcb->pcb_flags |= FP_VIRTFP; 1442 if (mdcpu->gd_npxthread == td) 1443 npxexit(); 1444 } else { 1445 td->td_pcb->pcb_flags &= ~FP_VIRTFP; 1446 } 1447 } 1448 1449 /* 1450 * Called from vkernel_trap() to fixup the vkernel's syscall 1451 * frame for vmspace_ctl() return. 1452 */ 1453 void 1454 cpu_vkernel_trap(struct trapframe *frame, int error) 1455 { 1456 frame->tf_rax = error; 1457 if (error) 1458 frame->tf_rflags |= PSL_C; 1459 else 1460 frame->tf_rflags &= ~PSL_C; 1461 } 1462