1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ 39 */ 40 41 /* 42 * x86_64 Trap and System call handling 43 */ 44 45 #include "use_isa.h" 46 47 #include "opt_ddb.h" 48 #include "opt_ktrace.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/proc.h> 53 #include <sys/pioctl.h> 54 #include <sys/kernel.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/signal2.h> 58 #include <sys/syscall.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/vmmeter.h> 62 #include <sys/malloc.h> 63 #ifdef KTRACE 64 #include <sys/ktrace.h> 65 #endif 66 #include <sys/ktr.h> 67 #include <sys/vkernel.h> 68 #include <sys/sysmsg.h> 69 #include <sys/vmspace.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_param.h> 73 #include <sys/lock.h> 74 #include <vm/pmap.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_map.h> 77 #include <vm/vm_page.h> 78 #include <vm/vm_extern.h> 79 80 #include <machine/cpu.h> 81 #include <machine/md_var.h> 82 #include <machine/pcb.h> 83 #include <machine/smp.h> 84 #include <machine/tss.h> 85 #include <machine/globaldata.h> 86 87 #include <ddb/ddb.h> 88 89 #include <sys/msgport2.h> 90 #include <sys/thread2.h> 91 #include <sys/mplock2.h> 92 93 int (*pmath_emulate) (struct trapframe *); 94 95 static int trap_pfault (struct trapframe *, int, vm_offset_t); 96 static void trap_fatal (struct trapframe *, int, vm_offset_t); 97 void dblfault_handler (void); 98 99 static struct krate segfltrate = { 1 }; 100 101 #if 0 102 extern inthand_t IDTVEC(syscall); 103 #endif 104 105 #define MAX_TRAP_MSG 30 106 static char *trap_msg[] = { 107 "", /* 0 unused */ 108 "privileged instruction fault", /* 1 T_PRIVINFLT */ 109 "", /* 2 unused */ 110 "breakpoint instruction fault", /* 3 T_BPTFLT */ 111 "", /* 4 unused */ 112 "", /* 5 unused */ 113 "arithmetic trap", /* 6 T_ARITHTRAP */ 114 "system forced exception", /* 7 T_ASTFLT */ 115 "", /* 8 unused */ 116 "general protection fault", /* 9 T_PROTFLT */ 117 "trace trap", /* 10 T_TRCTRAP */ 118 "", /* 11 unused */ 119 "page fault", /* 12 T_PAGEFLT */ 120 "", /* 13 unused */ 121 "alignment fault", /* 14 T_ALIGNFLT */ 122 "", /* 15 unused */ 123 "", /* 16 unused */ 124 "", /* 17 unused */ 125 "integer divide fault", /* 18 T_DIVIDE */ 126 "non-maskable interrupt trap", /* 19 T_NMI */ 127 "overflow trap", /* 20 T_OFLOW */ 128 "FPU bounds check fault", /* 21 T_BOUND */ 129 "FPU device not available", /* 22 T_DNA */ 130 "double fault", /* 23 T_DOUBLEFLT */ 131 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 132 "invalid TSS fault", /* 25 T_TSSFLT */ 133 "segment not present fault", /* 26 T_SEGNPFLT */ 134 "stack fault", /* 27 T_STKFLT */ 135 "machine check trap", /* 28 T_MCHK */ 136 "SIMD floating-point exception", /* 29 T_XMMFLT */ 137 "reserved (unknown) fault", /* 30 T_RESERVED */ 138 }; 139 140 #ifdef DDB 141 static int ddb_on_nmi = 1; 142 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, 143 &ddb_on_nmi, 0, "Go to DDB on NMI"); 144 #endif 145 static int panic_on_nmi = 1; 146 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 147 &panic_on_nmi, 0, "Panic on NMI"); 148 149 /* 150 * Passively intercepts the thread switch function to increase 151 * the thread priority from a user priority to a kernel priority, reducing 152 * syscall and trap overhead for the case where no switch occurs. 153 * 154 * Synchronizes td_ucred with p_ucred. This is used by system calls, 155 * signal handling, faults, AST traps, and anything else that enters the 156 * kernel from userland and provides the kernel with a stable read-only 157 * copy of the process ucred. 158 */ 159 static __inline void 160 userenter(struct thread *curtd, struct proc *curp) 161 { 162 struct ucred *ocred; 163 struct ucred *ncred; 164 165 curtd->td_release = lwkt_passive_release; 166 167 if (curtd->td_ucred != curp->p_ucred) { 168 ncred = crhold(curp->p_ucred); 169 ocred = curtd->td_ucred; 170 curtd->td_ucred = ncred; 171 if (ocred) 172 crfree(ocred); 173 } 174 } 175 176 /* 177 * Handle signals, profiling, and other AST's and/or tasks that 178 * must be completed before we can return to or try to return to userland. 179 * 180 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 181 * arithmatic on the delta calculation so the absolute tick values are 182 * truncated to an integer. 183 */ 184 static void 185 userret(struct lwp *lp, struct trapframe *frame, int sticks) 186 { 187 struct proc *p = lp->lwp_proc; 188 int sig; 189 int ptok; 190 191 /* 192 * Charge system time if profiling. Note: times are in microseconds. 193 * This may do a copyout and block, so do it first even though it 194 * means some system time will be charged as user time. 195 */ 196 if (p->p_flags & P_PROFIL) { 197 addupc_task(p, frame->tf_rip, 198 (u_int)((int)lp->lwp_thread->td_sticks - sticks)); 199 } 200 201 recheck: 202 /* 203 * Specific on-return-to-usermode checks (LWP_MP_WEXIT, 204 * LWP_MP_VNLRU, etc). 205 */ 206 if (lp->lwp_mpflags & LWP_MP_URETMASK) 207 lwpuserret(lp); 208 209 /* 210 * Block here if we are in a stopped state. 211 */ 212 if (STOPLWP(p, lp)) { 213 lwkt_gettoken(&p->p_token); 214 tstop(); 215 lwkt_reltoken(&p->p_token); 216 goto recheck; 217 } 218 219 /* 220 * Post any pending upcalls. If running a virtual kernel be sure 221 * to restore the virtual kernel's vmspace before posting the upcall. 222 */ 223 if (p->p_flags & (P_SIGVTALRM | P_SIGPROF)) { 224 lwkt_gettoken(&p->p_token); 225 if (p->p_flags & P_SIGVTALRM) { 226 p->p_flags &= ~P_SIGVTALRM; 227 ksignal(p, SIGVTALRM); 228 } 229 if (p->p_flags & P_SIGPROF) { 230 p->p_flags &= ~P_SIGPROF; 231 ksignal(p, SIGPROF); 232 } 233 lwkt_reltoken(&p->p_token); 234 goto recheck; 235 } 236 237 /* 238 * Post any pending signals 239 * 240 * WARNING! postsig() can exit and not return. 241 */ 242 if ((sig = CURSIG_LCK_TRACE(lp, &ptok)) != 0) { 243 postsig(sig, ptok); 244 goto recheck; 245 } 246 247 /* 248 * In a multi-threaded program it is possible for a thread to change 249 * signal state during a system call which temporarily changes the 250 * signal mask. In this case postsig() might not be run and we 251 * have to restore the mask ourselves. 252 */ 253 if (lp->lwp_flags & LWP_OLDMASK) { 254 lp->lwp_flags &= ~LWP_OLDMASK; 255 lp->lwp_sigmask = lp->lwp_oldsigmask; 256 goto recheck; 257 } 258 } 259 260 /* 261 * Cleanup from userenter and any passive release that might have occured. 262 * We must reclaim the current-process designation before we can return 263 * to usermode. We also handle both LWKT and USER reschedule requests. 264 */ 265 static __inline void 266 userexit(struct lwp *lp) 267 { 268 struct thread *td = lp->lwp_thread; 269 /* globaldata_t gd = td->td_gd; */ 270 271 /* 272 * Handle stop requests at kernel priority. Any requests queued 273 * after this loop will generate another AST. 274 */ 275 while (STOPLWP(lp->lwp_proc, lp)) { 276 lwkt_gettoken(&lp->lwp_proc->p_token); 277 tstop(); 278 lwkt_reltoken(&lp->lwp_proc->p_token); 279 } 280 281 /* 282 * Reduce our priority in preparation for a return to userland. If 283 * our passive release function was still in place, our priority was 284 * never raised and does not need to be reduced. 285 */ 286 lwkt_passive_recover(td); 287 288 /* 289 * Become the current user scheduled process if we aren't already, 290 * and deal with reschedule requests and other factors. 291 */ 292 lp->lwp_proc->p_usched->acquire_curproc(lp); 293 /* WARNING: we may have migrated cpu's */ 294 /* gd = td->td_gd; */ 295 } 296 297 #if !defined(KTR_KERNENTRY) 298 #define KTR_KERNENTRY KTR_ALL 299 #endif 300 KTR_INFO_MASTER(kernentry); 301 KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, 302 "TRAP(pid %hd, tid %hd, trapno %ld, eva %lu)", 303 pid_t pid, lwpid_t tid, register_t trapno, vm_offset_t eva); 304 KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "TRAP_RET(pid %hd, tid %hd)", 305 pid_t pid, lwpid_t tid); 306 KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "SYSC(pid %hd, tid %hd, nr %ld)", 307 pid_t pid, lwpid_t tid, register_t trapno); 308 KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "SYSRET(pid %hd, tid %hd, err %d)", 309 pid_t pid, lwpid_t tid, int err); 310 KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "FORKRET(pid %hd, tid %hd)", 311 pid_t pid, lwpid_t tid); 312 313 /* 314 * Exception, fault, and trap interface to the kernel. 315 * This common code is called from assembly language IDT gate entry 316 * routines that prepare a suitable stack frame, and restore this 317 * frame after the exception has been processed. 318 * 319 * This function is also called from doreti in an interlock to handle ASTs. 320 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap 321 * 322 * NOTE! We have to retrieve the fault address prior to obtaining the 323 * MP lock because get_mplock() may switch out. YYY cr2 really ought 324 * to be retrieved by the assembly code, not here. 325 * 326 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing 327 * if an attempt is made to switch from a fast interrupt or IPI. This is 328 * necessary to properly take fatal kernel traps on SMP machines if 329 * get_mplock() has to block. 330 */ 331 332 void 333 user_trap(struct trapframe *frame) 334 { 335 struct globaldata *gd = mycpu; 336 struct thread *td = gd->gd_curthread; 337 struct lwp *lp = td->td_lwp; 338 struct proc *p; 339 int sticks = 0; 340 int i = 0, ucode = 0, type, code; 341 #ifdef INVARIANTS 342 int crit_count = td->td_critcount; 343 lwkt_tokref_t curstop = td->td_toks_stop; 344 #endif 345 vm_offset_t eva; 346 347 p = td->td_proc; 348 349 if (frame->tf_trapno == T_PAGEFLT) 350 eva = frame->tf_addr; 351 else 352 eva = 0; 353 #if 0 354 kprintf("USER_TRAP AT %08lx xflags %ld trapno %ld eva %08lx\n", 355 frame->tf_rip, frame->tf_xflags, frame->tf_trapno, eva); 356 #endif 357 358 /* 359 * Everything coming from user mode runs through user_trap, 360 * including system calls. 361 */ 362 if (frame->tf_trapno == T_FAST_SYSCALL) { 363 syscall2(frame); 364 return; 365 } 366 367 KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, 368 frame->tf_trapno, eva); 369 370 #ifdef DDB 371 if (db_active) { 372 eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); 373 ++gd->gd_trap_nesting_level; 374 trap_fatal(frame, TRUE, eva); 375 --gd->gd_trap_nesting_level; 376 goto out2; 377 } 378 #endif 379 380 type = frame->tf_trapno; 381 code = frame->tf_err; 382 383 userenter(td, p); 384 385 sticks = (int)td->td_sticks; 386 lp->lwp_md.md_regs = frame; 387 388 switch (type) { 389 case T_PRIVINFLT: /* privileged instruction fault */ 390 i = SIGILL; 391 ucode = ILL_PRVOPC; 392 break; 393 394 case T_BPTFLT: /* bpt instruction fault */ 395 case T_TRCTRAP: /* trace trap */ 396 frame->tf_rflags &= ~PSL_T; 397 i = SIGTRAP; 398 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); 399 break; 400 401 case T_ARITHTRAP: /* arithmetic trap */ 402 ucode = code; 403 i = SIGFPE; 404 break; 405 406 case T_ASTFLT: /* Allow process switch */ 407 mycpu->gd_cnt.v_soft++; 408 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { 409 atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); 410 addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); 411 } 412 goto out; 413 414 /* 415 * The following two traps can happen in 416 * vm86 mode, and, if so, we want to handle 417 * them specially. 418 */ 419 case T_PROTFLT: /* general protection fault */ 420 case T_STKFLT: /* stack fault */ 421 #if 0 422 if (frame->tf_eflags & PSL_VM) { 423 i = vm86_emulate((struct vm86frame *)frame); 424 if (i == 0) 425 goto out; 426 break; 427 } 428 #endif 429 /* FALL THROUGH */ 430 431 case T_SEGNPFLT: /* segment not present fault */ 432 case T_TSSFLT: /* invalid TSS fault */ 433 case T_DOUBLEFLT: /* double fault */ 434 default: 435 i = SIGBUS; 436 ucode = code + BUS_SEGM_FAULT ; 437 break; 438 439 case T_PAGEFLT: /* page fault */ 440 i = trap_pfault(frame, TRUE, eva); 441 if (i == -1 || i == 0) 442 goto out; 443 444 445 if (i == SIGSEGV) 446 ucode = SEGV_MAPERR; 447 else { 448 i = SIGSEGV; 449 ucode = SEGV_ACCERR; 450 } 451 break; 452 453 case T_DIVIDE: /* integer divide fault */ 454 ucode = FPE_INTDIV; 455 i = SIGFPE; 456 break; 457 458 #if NISA > 0 459 case T_NMI: 460 /* machine/parity/power fail/"kitchen sink" faults */ 461 if (isa_nmi(code) == 0) { 462 #ifdef DDB 463 /* 464 * NMI can be hooked up to a pushbutton 465 * for debugging. 466 */ 467 if (ddb_on_nmi) { 468 kprintf ("NMI ... going to debugger\n"); 469 kdb_trap(type, 0, frame); 470 } 471 #endif /* DDB */ 472 goto out2; 473 } else if (panic_on_nmi) 474 panic("NMI indicates hardware failure"); 475 break; 476 #endif /* NISA > 0 */ 477 478 case T_OFLOW: /* integer overflow fault */ 479 ucode = FPE_INTOVF; 480 i = SIGFPE; 481 break; 482 483 case T_BOUND: /* bounds check fault */ 484 ucode = FPE_FLTSUB; 485 i = SIGFPE; 486 break; 487 488 case T_DNA: 489 /* 490 * Virtual kernel intercept - pass the DNA exception 491 * to the (emulated) virtual kernel if it asked to handle 492 * it. This occurs when the virtual kernel is holding 493 * onto the FP context for a different emulated 494 * process then the one currently running. 495 * 496 * We must still call npxdna() since we may have 497 * saved FP state that the (emulated) virtual kernel 498 * needs to hand over to a different emulated process. 499 */ 500 if (lp->lwp_vkernel && lp->lwp_vkernel->ve && 501 (td->td_pcb->pcb_flags & FP_VIRTFP) 502 ) { 503 npxdna(frame); 504 break; 505 } 506 507 /* 508 * The kernel may have switched out the FP unit's 509 * state, causing the user process to take a fault 510 * when it tries to use the FP unit. Restore the 511 * state here 512 */ 513 if (npxdna(frame)) { 514 gd->gd_cnt.v_trap++; 515 goto out; 516 } 517 if (!pmath_emulate) { 518 i = SIGFPE; 519 ucode = FPE_FPU_NP_TRAP; 520 break; 521 } 522 i = (*pmath_emulate)(frame); 523 if (i == 0) { 524 if (!(frame->tf_rflags & PSL_T)) 525 goto out2; 526 frame->tf_rflags &= ~PSL_T; 527 i = SIGTRAP; 528 } 529 /* else ucode = emulator_only_knows() XXX */ 530 break; 531 532 case T_FPOPFLT: /* FPU operand fetch fault */ 533 ucode = T_FPOPFLT; 534 i = SIGILL; 535 break; 536 537 case T_XMMFLT: /* SIMD floating-point exception */ 538 ucode = 0; /* XXX */ 539 i = SIGFPE; 540 break; 541 } 542 543 /* 544 * Virtual kernel intercept - if the fault is directly related to a 545 * VM context managed by a virtual kernel then let the virtual kernel 546 * handle it. 547 */ 548 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 549 vkernel_trap(lp, frame); 550 goto out; 551 } 552 553 /* 554 * Translate fault for emulators (e.g. Linux) 555 */ 556 if (*p->p_sysent->sv_transtrap) 557 i = (*p->p_sysent->sv_transtrap)(i, type); 558 559 trapsignal(lp, i, ucode); 560 561 #ifdef DEBUG 562 if (type <= MAX_TRAP_MSG) { 563 uprintf("fatal process exception: %s", 564 trap_msg[type]); 565 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 566 uprintf(", fault VA = 0x%lx", (u_long)eva); 567 uprintf("\n"); 568 } 569 #endif 570 571 out: 572 userret(lp, frame, sticks); 573 userexit(lp); 574 out2: ; 575 KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 576 #ifdef INVARIANTS 577 KASSERT(crit_count == td->td_critcount, 578 ("trap: critical section count mismatch! %d/%d", 579 crit_count, td->td_pri)); 580 KASSERT(curstop == td->td_toks_stop, 581 ("trap: extra tokens held after trap! %ld/%ld", 582 curstop - &td->td_toks_base, 583 td->td_toks_stop - &td->td_toks_base)); 584 #endif 585 } 586 587 void 588 kern_trap(struct trapframe *frame) 589 { 590 struct globaldata *gd = mycpu; 591 struct thread *td = gd->gd_curthread; 592 struct lwp *lp; 593 struct proc *p; 594 int i = 0, ucode = 0, type, code; 595 #ifdef INVARIANTS 596 int crit_count = td->td_critcount; 597 lwkt_tokref_t curstop = td->td_toks_stop; 598 #endif 599 vm_offset_t eva; 600 601 lp = td->td_lwp; 602 p = td->td_proc; 603 604 if (frame->tf_trapno == T_PAGEFLT) 605 eva = frame->tf_addr; 606 else 607 eva = 0; 608 609 #ifdef DDB 610 if (db_active) { 611 ++gd->gd_trap_nesting_level; 612 trap_fatal(frame, FALSE, eva); 613 --gd->gd_trap_nesting_level; 614 goto out2; 615 } 616 #endif 617 618 type = frame->tf_trapno; 619 code = frame->tf_err; 620 621 #if 0 622 kernel_trap: 623 #endif 624 /* kernel trap */ 625 626 switch (type) { 627 case T_PAGEFLT: /* page fault */ 628 trap_pfault(frame, FALSE, eva); 629 goto out2; 630 631 case T_DNA: 632 /* 633 * The kernel may be using npx for copying or other 634 * purposes. 635 */ 636 panic("kernel NPX should not happen"); 637 if (npxdna(frame)) 638 goto out2; 639 break; 640 641 case T_PROTFLT: /* general protection fault */ 642 case T_SEGNPFLT: /* segment not present fault */ 643 /* 644 * Invalid segment selectors and out of bounds 645 * %eip's and %esp's can be set up in user mode. 646 * This causes a fault in kernel mode when the 647 * kernel tries to return to user mode. We want 648 * to get this fault so that we can fix the 649 * problem here and not have to check all the 650 * selectors and pointers when the user changes 651 * them. 652 */ 653 if (mycpu->gd_intr_nesting_level == 0) { 654 if (td->td_pcb->pcb_onfault) { 655 frame->tf_rip = 656 (register_t)td->td_pcb->pcb_onfault; 657 goto out2; 658 } 659 } 660 break; 661 662 case T_TSSFLT: 663 /* 664 * PSL_NT can be set in user mode and isn't cleared 665 * automatically when the kernel is entered. This 666 * causes a TSS fault when the kernel attempts to 667 * `iret' because the TSS link is uninitialized. We 668 * want to get this fault so that we can fix the 669 * problem here and not every time the kernel is 670 * entered. 671 */ 672 if (frame->tf_rflags & PSL_NT) { 673 frame->tf_rflags &= ~PSL_NT; 674 goto out2; 675 } 676 break; 677 678 case T_TRCTRAP: /* trace trap */ 679 #if 0 680 if (frame->tf_eip == (int)IDTVEC(syscall)) { 681 /* 682 * We've just entered system mode via the 683 * syscall lcall. Continue single stepping 684 * silently until the syscall handler has 685 * saved the flags. 686 */ 687 goto out2; 688 } 689 if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { 690 /* 691 * The syscall handler has now saved the 692 * flags. Stop single stepping it. 693 */ 694 frame->tf_eflags &= ~PSL_T; 695 goto out2; 696 } 697 #endif 698 #if 0 699 /* 700 * Ignore debug register trace traps due to 701 * accesses in the user's address space, which 702 * can happen under several conditions such as 703 * if a user sets a watchpoint on a buffer and 704 * then passes that buffer to a system call. 705 * We still want to get TRCTRAPS for addresses 706 * in kernel space because that is useful when 707 * debugging the kernel. 708 */ 709 if (user_dbreg_trap()) { 710 /* 711 * Reset breakpoint bits because the 712 * processor doesn't 713 */ 714 load_dr6(rdr6() & 0xfffffff0); 715 goto out2; 716 } 717 #endif 718 /* 719 * Fall through (TRCTRAP kernel mode, kernel address) 720 */ 721 case T_BPTFLT: 722 /* 723 * If DDB is enabled, let it handle the debugger trap. 724 * Otherwise, debugger traps "can't happen". 725 */ 726 #ifdef DDB 727 if (kdb_trap (type, 0, frame)) 728 goto out2; 729 #endif 730 break; 731 case T_DIVIDE: 732 trap_fatal(frame, FALSE, eva); 733 goto out2; 734 case T_NMI: 735 trap_fatal(frame, FALSE, eva); 736 goto out2; 737 case T_SYSCALL80: 738 case T_FAST_SYSCALL: 739 /* 740 * Ignore this trap generated from a spurious SIGTRAP. 741 * 742 * single stepping in / syscalls leads to spurious / SIGTRAP 743 * so ignore 744 * 745 * Haiku (c) 2007 Simon 'corecode' Schubert 746 */ 747 goto out2; 748 } 749 750 /* 751 * Translate fault for emulators (e.g. Linux) 752 */ 753 if (*p->p_sysent->sv_transtrap) 754 i = (*p->p_sysent->sv_transtrap)(i, type); 755 756 gd->gd_cnt.v_trap++; 757 trapsignal(lp, i, ucode); 758 759 #ifdef DEBUG 760 if (type <= MAX_TRAP_MSG) { 761 uprintf("fatal process exception: %s", 762 trap_msg[type]); 763 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 764 uprintf(", fault VA = 0x%lx", (u_long)eva); 765 uprintf("\n"); 766 } 767 #endif 768 769 out2: 770 ; 771 #ifdef INVARIANTS 772 KASSERT(crit_count == td->td_critcount, 773 ("trap: critical section count mismatch! %d/%d", 774 crit_count, td->td_pri)); 775 KASSERT(curstop == td->td_toks_stop, 776 ("trap: extra tokens held after trap! %ld/%ld", 777 curstop - &td->td_toks_base, 778 td->td_toks_stop - &td->td_toks_base)); 779 #endif 780 } 781 782 int 783 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) 784 { 785 vm_offset_t va; 786 struct vmspace *vm = NULL; 787 vm_map_t map = 0; 788 int rv = 0; 789 vm_prot_t ftype; 790 thread_t td = curthread; 791 struct lwp *lp = td->td_lwp; 792 int fault_flags; 793 794 va = trunc_page(eva); 795 if (usermode == FALSE) { 796 /* 797 * This is a fault on kernel virtual memory. 798 */ 799 map = kernel_map; 800 } else { 801 /* 802 * This is a fault on non-kernel virtual memory. 803 * vm is initialized above to NULL. If curproc is NULL 804 * or curproc->p_vmspace is NULL the fault is fatal. 805 */ 806 if (lp != NULL) 807 vm = lp->lwp_vmspace; 808 809 if (vm == NULL) 810 goto nogo; 811 812 map = &vm->vm_map; 813 } 814 815 if (frame->tf_err & PGEX_W) 816 ftype = VM_PROT_READ | VM_PROT_WRITE; 817 else if (frame->tf_err & PGEX_I) 818 ftype = VM_PROT_EXECUTE; 819 else 820 ftype = VM_PROT_READ; 821 822 if (map != kernel_map) { 823 /* 824 * Keep swapout from messing with us during this 825 * critical time. 826 */ 827 PHOLD(lp->lwp_proc); 828 829 #if 0 830 /* 831 * Grow the stack if necessary 832 */ 833 /* grow_stack returns false only if va falls into 834 * a growable stack region and the stack growth 835 * fails. It returns true if va was not within 836 * a growable stack region, or if the stack 837 * growth succeeded. 838 */ 839 if (!grow_stack (map, va)) { 840 rv = KERN_FAILURE; 841 PRELE(lp->lwp_proc); 842 goto nogo; 843 } 844 #endif 845 846 fault_flags = 0; 847 if (usermode) 848 fault_flags |= VM_FAULT_BURST | VM_FAULT_USERMODE; 849 if (ftype & VM_PROT_WRITE) 850 fault_flags |= VM_FAULT_DIRTY; 851 else 852 fault_flags |= VM_FAULT_NORMAL; 853 rv = vm_fault(map, va, ftype, fault_flags); 854 855 PRELE(lp->lwp_proc); 856 } else { 857 /* 858 * Don't have to worry about process locking or stacks in the kernel. 859 */ 860 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 861 } 862 863 if (rv == KERN_SUCCESS) 864 return (0); 865 nogo: 866 if (!usermode) { 867 if (td->td_gd->gd_intr_nesting_level == 0 && 868 td->td_pcb->pcb_onfault) { 869 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; 870 return (0); 871 } 872 trap_fatal(frame, usermode, eva); 873 return (-1); 874 } 875 876 /* 877 * NOTE: on x86_64 we have a tf_addr field in the trapframe, no 878 * kludge is needed to pass the fault address to signal handlers. 879 */ 880 struct proc *p = td->td_proc; 881 krateprintf(&segfltrate, 882 "seg-fault accessing address %p " 883 "rip=%p pid=%d p_comm=%s\n", 884 (void *)va, 885 (void *)frame->tf_rip, p->p_pid, p->p_comm); 886 /* Debugger("seg-fault"); */ 887 888 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 889 } 890 891 static void 892 trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) 893 { 894 int code, type, ss; 895 long rsp; 896 897 code = frame->tf_xflags; 898 type = frame->tf_trapno; 899 900 if (type <= MAX_TRAP_MSG) { 901 kprintf("\n\nFatal trap %d: %s while in %s mode\n", 902 type, trap_msg[type], 903 (usermode ? "user" : "kernel")); 904 } 905 /* two separate prints in case of a trap on an unmapped page */ 906 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 907 if (type == T_PAGEFLT) { 908 kprintf("fault virtual address = %p\n", (void *)eva); 909 kprintf("fault code = %s %s, %s\n", 910 usermode ? "user" : "supervisor", 911 code & PGEX_W ? "write" : "read", 912 code & PGEX_P ? "protection violation" : "page not present"); 913 } 914 kprintf("instruction pointer = 0x%lx:0x%lx\n", 915 frame->tf_cs & 0xffff, frame->tf_rip); 916 if (usermode) { 917 ss = frame->tf_ss & 0xffff; 918 rsp = frame->tf_rsp; 919 } else { 920 ss = GSEL(GDATA_SEL, SEL_KPL); 921 rsp = (long)&frame->tf_rsp; 922 } 923 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); 924 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 925 kprintf("processor eflags = "); 926 if (frame->tf_rflags & PSL_T) 927 kprintf("trace trap, "); 928 if (frame->tf_rflags & PSL_I) 929 kprintf("interrupt enabled, "); 930 if (frame->tf_rflags & PSL_NT) 931 kprintf("nested task, "); 932 if (frame->tf_rflags & PSL_RF) 933 kprintf("resume, "); 934 #if 0 935 if (frame->tf_eflags & PSL_VM) 936 kprintf("vm86, "); 937 #endif 938 kprintf("IOPL = %jd\n", (intmax_t)((frame->tf_rflags & PSL_IOPL) >> 12)); 939 kprintf("current process = "); 940 if (curproc) { 941 kprintf("%lu (%s)\n", 942 (u_long)curproc->p_pid, curproc->p_comm ? 943 curproc->p_comm : ""); 944 } else { 945 kprintf("Idle\n"); 946 } 947 kprintf("current thread = pri %d ", curthread->td_pri); 948 if (curthread->td_critcount) 949 kprintf("(CRIT)"); 950 kprintf("\n"); 951 /** 952 * XXX FIXME: 953 * we probably SHOULD have stopped the other CPUs before now! 954 * another CPU COULD have been touching cpl at this moment... 955 */ 956 kprintf(" <- SMP: XXX"); 957 kprintf("\n"); 958 959 #ifdef KDB 960 if (kdb_trap(&psl)) 961 return; 962 #endif 963 #ifdef DDB 964 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) 965 return; 966 #endif 967 kprintf("trap number = %d\n", type); 968 if (type <= MAX_TRAP_MSG) 969 panic("%s", trap_msg[type]); 970 else 971 panic("unknown/reserved trap"); 972 } 973 974 /* 975 * Double fault handler. Called when a fault occurs while writing 976 * a frame for a trap/exception onto the stack. This usually occurs 977 * when the stack overflows (such is the case with infinite recursion, 978 * for example). 979 * 980 * XXX Note that the current PTD gets replaced by IdlePTD when the 981 * task switch occurs. This means that the stack that was active at 982 * the time of the double fault is not available at <kstack> unless 983 * the machine was idle when the double fault occurred. The downside 984 * of this is that "trace <ebp>" in ddb won't work. 985 */ 986 void 987 dblfault_handler(void) 988 { 989 #if 0 /* JG */ 990 struct mdglobaldata *gd = mdcpu; 991 #endif 992 993 kprintf("\nFatal double fault:\n"); 994 #if 0 /* JG */ 995 kprintf("rip = 0x%lx\n", gd->gd_common_tss.tss_rip); 996 kprintf("rsp = 0x%lx\n", gd->gd_common_tss.tss_rsp); 997 kprintf("rbp = 0x%lx\n", gd->gd_common_tss.tss_rbp); 998 #endif 999 /* two separate prints in case of a trap on an unmapped page */ 1000 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 1001 panic("double fault"); 1002 } 1003 1004 /* 1005 * syscall2 - MP aware system call request C handler 1006 * 1007 * A system call is essentially treated as a trap except that the 1008 * MP lock is not held on entry or return. We are responsible for 1009 * obtaining the MP lock if necessary and for handling ASTs 1010 * (e.g. a task switch) prior to return. 1011 */ 1012 void 1013 syscall2(struct trapframe *frame) 1014 { 1015 struct thread *td = curthread; 1016 struct proc *p = td->td_proc; 1017 struct lwp *lp = td->td_lwp; 1018 struct sysent *callp; 1019 register_t orig_tf_rflags; 1020 int sticks; 1021 int error; 1022 int narg; 1023 #ifdef INVARIANTS 1024 int crit_count = td->td_critcount; 1025 lwkt_tokref_t curstop = td->td_toks_stop; 1026 #endif 1027 struct sysmsg sysmsg; 1028 union sysunion *argp; 1029 u_int code; 1030 const int regcnt = 6; 1031 1032 mycpu->gd_cnt.v_syscall++; 1033 1034 KTR_LOG(kernentry_syscall, lp->lwp_proc->p_pid, lp->lwp_tid, 1035 frame->tf_rax); 1036 1037 userenter(td, p); /* lazy raise our priority */ 1038 1039 /* 1040 * Misc 1041 */ 1042 sticks = (int)td->td_sticks; 1043 orig_tf_rflags = frame->tf_rflags; 1044 1045 /* 1046 * Virtual kernel intercept - if a VM context managed by a virtual 1047 * kernel issues a system call the virtual kernel handles it, not us. 1048 * Restore the virtual kernel context and return from its system 1049 * call. The current frame is copied out to the virtual kernel. 1050 */ 1051 if (__predict_false(lp->lwp_vkernel && lp->lwp_vkernel->ve)) { 1052 vkernel_trap(lp, frame); 1053 error = EJUSTRETURN; 1054 callp = NULL; 1055 code = 0; 1056 goto out; 1057 } 1058 1059 /* 1060 * Get the system call parameters and account for time 1061 */ 1062 lp->lwp_md.md_regs = frame; 1063 code = frame->tf_rax; 1064 1065 if (code >= p->p_sysent->sv_size) 1066 code = SYS___nosys; 1067 argp = (union sysunion *)&frame->tf_rdi; 1068 callp = &p->p_sysent->sv_table[code]; 1069 1070 /* 1071 * On x86_64 we get up to six arguments in registers. The rest are 1072 * on the stack. The first six members of 'struct trapframe' happen 1073 * to be the registers used to pass arguments, in exactly the right 1074 * order. 1075 * 1076 * Any arguments beyond available argument-passing registers must 1077 * be copyin()'d from the user stack. 1078 */ 1079 narg = callp->sy_narg; 1080 if (__predict_false(narg > regcnt)) { 1081 register_t *argsdst; 1082 caddr_t params; 1083 1084 argsdst = (register_t *)&sysmsg.extargs; 1085 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1086 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1087 1088 KASSERT(params != NULL, ("copyin args with no params!")); 1089 error = copyin(params, &argsdst[regcnt], 1090 (narg - regcnt) * sizeof(register_t)); 1091 argp = (void *)argsdst; 1092 if (error) { 1093 #ifdef KTRACE 1094 if (KTRPOINT(td, KTR_SYSCALL)) { 1095 ktrsyscall(lp, code, narg, argp); 1096 } 1097 #endif 1098 goto bad; 1099 } 1100 } 1101 1102 #ifdef KTRACE 1103 if (KTRPOINT(td, KTR_SYSCALL)) { 1104 ktrsyscall(lp, code, narg, argp); 1105 } 1106 #endif 1107 1108 /* 1109 * Default return value is 0 (will be copied to %rax). Double-value 1110 * returns use %rax and %rdx. %rdx is left unchanged for system 1111 * calls which return only one result. 1112 */ 1113 sysmsg.sysmsg_fds[0] = 0; 1114 sysmsg.sysmsg_fds[1] = frame->tf_rdx; 1115 1116 /* 1117 * The syscall might manipulate the trap frame. If it does it 1118 * will probably return EJUSTRETURN. 1119 */ 1120 sysmsg.sysmsg_frame = frame; 1121 1122 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1123 1124 /* 1125 * NOTE: All system calls run MPSAFE now. The system call itself 1126 * is responsible for getting the MP lock. 1127 */ 1128 error = (*callp->sy_call)(&sysmsg, argp); 1129 1130 #if 0 1131 kprintf("system call %d returned %d\n", code, error); 1132 #endif 1133 1134 out: 1135 /* 1136 * MP SAFE (we may or may not have the MP lock at this point) 1137 */ 1138 switch (error) { 1139 case 0: 1140 /* 1141 * Reinitialize proc pointer `p' as it may be different 1142 * if this is a child returning from fork syscall. 1143 */ 1144 p = curproc; 1145 lp = curthread->td_lwp; 1146 frame->tf_rax = sysmsg.sysmsg_fds[0]; 1147 frame->tf_rdx = sysmsg.sysmsg_fds[1]; 1148 frame->tf_rflags &= ~PSL_C; 1149 break; 1150 case ERESTART: 1151 /* 1152 * Reconstruct pc, we know that 'syscall' is 2 bytes. 1153 * We have to do a full context restore so that %r10 1154 * (which was holding the value of %rcx) is restored for 1155 * the next iteration. 1156 */ 1157 frame->tf_rip -= frame->tf_err; 1158 frame->tf_r10 = frame->tf_rcx; 1159 break; 1160 case EJUSTRETURN: 1161 break; 1162 case EASYNC: 1163 panic("Unexpected EASYNC return value (for now)"); 1164 default: 1165 bad: 1166 if (p->p_sysent->sv_errsize) { 1167 if (error >= p->p_sysent->sv_errsize) 1168 error = -1; /* XXX */ 1169 else 1170 error = p->p_sysent->sv_errtbl[error]; 1171 } 1172 frame->tf_rax = error; 1173 frame->tf_rflags |= PSL_C; 1174 break; 1175 } 1176 1177 /* 1178 * Traced syscall. trapsignal() is not MP aware. 1179 */ 1180 if (orig_tf_rflags & PSL_T) { 1181 frame->tf_rflags &= ~PSL_T; 1182 trapsignal(lp, SIGTRAP, 0); 1183 } 1184 1185 /* 1186 * Handle reschedule and other end-of-syscall issues 1187 */ 1188 userret(lp, frame, sticks); 1189 1190 #ifdef KTRACE 1191 if (KTRPOINT(td, KTR_SYSRET)) { 1192 ktrsysret(lp, code, error, sysmsg.sysmsg_result); 1193 } 1194 #endif 1195 1196 /* 1197 * This works because errno is findable through the 1198 * register set. If we ever support an emulation where this 1199 * is not the case, this code will need to be revisited. 1200 */ 1201 STOPEVENT(p, S_SCX, code); 1202 1203 userexit(lp); 1204 KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); 1205 #ifdef INVARIANTS 1206 KASSERT(&td->td_toks_base == td->td_toks_stop, 1207 ("syscall: critical section count mismatch! %d/%d", 1208 crit_count, td->td_pri)); 1209 KASSERT(curstop == td->td_toks_stop, 1210 ("syscall: extra tokens held after trap! %ld", 1211 td->td_toks_stop - &td->td_toks_base)); 1212 #endif 1213 } 1214 1215 /* 1216 * Handles the syscall() and __syscall() API 1217 */ 1218 void xsyscall(struct sysmsg *sysmsg, struct nosys_args *uap); 1219 1220 int 1221 sys_xsyscall(struct sysmsg *sysmsg, const struct nosys_args *uap) 1222 { 1223 struct trapframe *frame; 1224 struct sysent *callp; 1225 union sysunion *argp; 1226 struct thread *td; 1227 const int regcnt = 5; /* number of args passed in registers */ 1228 u_int code; 1229 int error; 1230 int narg; 1231 1232 td = curthread; 1233 frame = sysmsg->sysmsg_frame; 1234 code = (u_int)frame->tf_rdi; 1235 if (code >= td->td_proc->p_sysent->sv_size) 1236 code = SYS___nosys; 1237 argp = (union sysunion *)(&frame->tf_rdi + 1); 1238 callp = &td->td_proc->p_sysent->sv_table[code]; 1239 narg = callp->sy_narg; 1240 1241 /* 1242 * On x86_64 we get up to six arguments in registers. The rest are 1243 * on the stack. However, for syscall() and __syscall() the syscall 1244 * number is inserted as the first argument, so the limit is reduced 1245 * by one to five. 1246 */ 1247 if (__predict_false(narg > regcnt)) { 1248 register_t *argsdst; 1249 caddr_t params; 1250 1251 argsdst = (register_t *)&sysmsg->extargs; 1252 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1253 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1254 error = copyin(params, &argsdst[regcnt], 1255 (narg - regcnt) * sizeof(register_t)); 1256 argp = (void *)argsdst; 1257 if (error) 1258 return error; 1259 } 1260 1261 #ifdef KTRACE 1262 if (KTRPOINTP(td->td_proc, td, KTR_SYSCALL)) { 1263 ktrsyscall(td->td_lwp, code, narg, argp); 1264 } 1265 #endif 1266 1267 error = (*callp->sy_call)(sysmsg, argp); 1268 1269 #ifdef KTRACE 1270 if (KTRPOINTP(td->td_proc, td, KTR_SYSRET)) { 1271 ktrsysret(td->td_lwp, code, error, sysmsg->sysmsg_result); 1272 } 1273 #endif 1274 1275 return error; 1276 } 1277 1278 /* 1279 * NOTE: mplock not held at any point 1280 */ 1281 void 1282 fork_return(struct lwp *lp, struct trapframe *frame) 1283 { 1284 frame->tf_rax = 0; /* Child returns zero */ 1285 frame->tf_rflags &= ~PSL_C; /* success */ 1286 frame->tf_rdx = 1; 1287 1288 generic_lwp_return(lp, frame); 1289 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 1290 } 1291 1292 /* 1293 * Simplified back end of syscall(), used when returning from fork() 1294 * directly into user mode. 1295 * 1296 * This code will return back into the fork trampoline code which then 1297 * runs doreti. 1298 * 1299 * NOTE: The mplock is not held at any point. 1300 */ 1301 void 1302 generic_lwp_return(struct lwp *lp, struct trapframe *frame) 1303 { 1304 struct proc *p = lp->lwp_proc; 1305 1306 /* 1307 * Check for exit-race. If one lwp exits the process concurrent with 1308 * another lwp creating a new thread, the two operations may cross 1309 * each other resulting in the newly-created lwp not receiving a 1310 * KILL signal. 1311 */ 1312 if (p->p_flags & P_WEXIT) { 1313 lwpsignal(p, lp, SIGKILL); 1314 } 1315 1316 /* 1317 * Newly forked processes are given a kernel priority. We have to 1318 * adjust the priority to a normal user priority and fake entry 1319 * into the kernel (call userenter()) to install a passive release 1320 * function just in case userret() decides to stop the process. This 1321 * can occur when ^Z races a fork. If we do not install the passive 1322 * release function the current process designation will not be 1323 * released when the thread goes to sleep. 1324 */ 1325 lwkt_setpri_self(TDPRI_USER_NORM); 1326 userenter(lp->lwp_thread, p); 1327 userret(lp, frame, 0); 1328 #ifdef KTRACE 1329 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) 1330 ktrsysret(lp, SYS_fork, 0, 0); 1331 #endif 1332 lp->lwp_flags |= LWP_PASSIVE_ACQ; 1333 userexit(lp); 1334 lp->lwp_flags &= ~LWP_PASSIVE_ACQ; 1335 } 1336 1337 /* 1338 * doreti has turned into this. The frame is directly on the stack. We 1339 * pull everything else we need (fpu and tls context) from the current 1340 * thread. 1341 * 1342 * Note on fpu interactions: In a virtual kernel, the fpu context for 1343 * an emulated user mode process is not shared with the virtual kernel's 1344 * fpu context, so we only have to 'stack' fpu contexts within the virtual 1345 * kernel itself, and not even then since the signal() contexts that we care 1346 * about save and restore the FPU state (I think anyhow). 1347 * 1348 * vmspace_ctl() returns an error only if it had problems instaling the 1349 * context we supplied or problems copying data to/from our VM space. 1350 */ 1351 void 1352 go_user(struct intrframe *frame) 1353 { 1354 struct trapframe *tf = (void *)&frame->if_rdi; 1355 globaldata_t gd; 1356 int r; 1357 void *id; 1358 1359 /* 1360 * Interrupts may be disabled on entry, make sure all signals 1361 * can be received before beginning our loop. 1362 */ 1363 sigsetmask(0); 1364 1365 /* 1366 * Switch to the current simulated user process, then call 1367 * user_trap() when we break out of it (usually due to a signal). 1368 */ 1369 for (;;) { 1370 #if 1 1371 /* 1372 * Always make the FPU state correct. This should generally 1373 * be faster because the cost of taking a #NM fault through 1374 * the vkernel to the real kernel is astronomical. 1375 */ 1376 crit_enter(); 1377 tf->tf_xflags &= ~PGEX_FPFAULT; 1378 if (mdcpu->gd_npxthread != curthread) { 1379 if (mdcpu->gd_npxthread) 1380 npxsave(mdcpu->gd_npxthread->td_savefpu); 1381 npxdna(tf); 1382 } 1383 #else 1384 /* 1385 * Tell the real kernel whether it is ok to use the FP 1386 * unit or not, allowing us to take a T_DNA exception 1387 * if the context tries to use the FP. 1388 */ 1389 if (mdcpu->gd_npxthread == curthread) { 1390 tf->tf_xflags &= ~PGEX_FPFAULT; 1391 } else { 1392 tf->tf_xflags |= PGEX_FPFAULT; 1393 } 1394 #endif 1395 1396 /* 1397 * Run emulated user process context. This call interlocks 1398 * with new mailbox signals. 1399 * 1400 * Set PGEX_U unconditionally, indicating a user frame (the 1401 * bit is normally set only by T_PAGEFLT). 1402 */ 1403 id = &curproc->p_vmspace->vm_pmap; 1404 1405 /* 1406 * The GDF_VIRTUSER hack helps statclock() figure out who 1407 * the tick belongs to. 1408 */ 1409 gd = mycpu; 1410 gd->gd_flags |= GDF_VIRTUSER; 1411 r = vmspace_ctl(id, VMSPACE_CTL_RUN, tf, 1412 &curthread->td_savevext); 1413 1414 frame->if_xflags |= PGEX_U; 1415 1416 /* 1417 * Immediately save the user FPU state. The vkernel is a 1418 * user program and libraries like libc will use the FP 1419 * unit. 1420 */ 1421 if (mdcpu->gd_npxthread == curthread) { 1422 npxsave(mdcpu->gd_npxthread->td_savefpu); 1423 } 1424 crit_exit(); 1425 gd->gd_flags &= ~GDF_VIRTUSER; 1426 #if 0 1427 kprintf("GO USER %d trap %ld EVA %08lx RIP %08lx RSP %08lx XFLAGS %02lx/%02lx\n", 1428 r, tf->tf_trapno, tf->tf_addr, tf->tf_rip, tf->tf_rsp, 1429 tf->tf_xflags, frame->if_xflags); 1430 #endif 1431 if (r < 0) { 1432 if (errno != EINTR) 1433 panic("vmspace_ctl failed error %d", errno); 1434 } else { 1435 if (tf->tf_trapno) { 1436 user_trap(tf); 1437 } 1438 } 1439 if (mycpu->gd_reqflags & RQF_AST_MASK) { 1440 tf->tf_trapno = T_ASTFLT; 1441 user_trap(tf); 1442 } 1443 tf->tf_trapno = 0; 1444 } 1445 } 1446 1447 /* 1448 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA 1449 * fault (which is then passed back to the virtual kernel) if an attempt is 1450 * made to use the FP unit. 1451 * 1452 * XXX this is a fairly big hack. 1453 */ 1454 void 1455 set_vkernel_fp(struct trapframe *frame) 1456 { 1457 struct thread *td = curthread; 1458 1459 if (frame->tf_xflags & PGEX_FPFAULT) { 1460 td->td_pcb->pcb_flags |= FP_VIRTFP; 1461 if (mdcpu->gd_npxthread == td) 1462 npxexit(); 1463 } else { 1464 td->td_pcb->pcb_flags &= ~FP_VIRTFP; 1465 } 1466 } 1467 1468 /* 1469 * Called from vkernel_trap() to fixup the vkernel's syscall 1470 * frame for vmspace_ctl() return. 1471 */ 1472 void 1473 cpu_vkernel_trap(struct trapframe *frame, int error) 1474 { 1475 frame->tf_rax = error; 1476 if (error) 1477 frame->tf_rflags |= PSL_C; 1478 else 1479 frame->tf_rflags &= ~PSL_C; 1480 } 1481