1 /*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ 39 */ 40 41 /* 42 * x86_64 Trap and System call handling 43 */ 44 45 #include "use_isa.h" 46 47 #include "opt_ddb.h" 48 #include "opt_ktrace.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/proc.h> 53 #include <sys/pioctl.h> 54 #include <sys/kernel.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/signal2.h> 58 #include <sys/syscall.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/uio.h> 62 #include <sys/vmmeter.h> 63 #include <sys/malloc.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 #include <sys/ktr.h> 68 #include <sys/upcall.h> 69 #include <sys/vkernel.h> 70 #include <sys/sysproto.h> 71 #include <sys/sysunion.h> 72 #include <sys/vmspace.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_param.h> 76 #include <sys/lock.h> 77 #include <vm/pmap.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_map.h> 80 #include <vm/vm_page.h> 81 #include <vm/vm_extern.h> 82 83 #include <machine/cpu.h> 84 #include <machine/md_var.h> 85 #include <machine/pcb.h> 86 #include <machine/smp.h> 87 #include <machine/tss.h> 88 #include <machine/globaldata.h> 89 90 #include <ddb/ddb.h> 91 92 #include <sys/msgport2.h> 93 #include <sys/thread2.h> 94 #include <sys/mplock2.h> 95 96 #ifdef SMP 97 98 #define MAKEMPSAFE(have_mplock) \ 99 if (have_mplock == 0) { \ 100 get_mplock(); \ 101 have_mplock = 1; \ 102 } 103 104 #else 105 106 #define MAKEMPSAFE(have_mplock) 107 108 #endif 109 110 int (*pmath_emulate) (struct trapframe *); 111 112 extern int trapwrite (unsigned addr); 113 114 static int trap_pfault (struct trapframe *, int, vm_offset_t); 115 static void trap_fatal (struct trapframe *, int, vm_offset_t); 116 void dblfault_handler (void); 117 118 #if 0 119 extern inthand_t IDTVEC(syscall); 120 #endif 121 122 #define MAX_TRAP_MSG 30 123 static char *trap_msg[] = { 124 "", /* 0 unused */ 125 "privileged instruction fault", /* 1 T_PRIVINFLT */ 126 "", /* 2 unused */ 127 "breakpoint instruction fault", /* 3 T_BPTFLT */ 128 "", /* 4 unused */ 129 "", /* 5 unused */ 130 "arithmetic trap", /* 6 T_ARITHTRAP */ 131 "system forced exception", /* 7 T_ASTFLT */ 132 "", /* 8 unused */ 133 "general protection fault", /* 9 T_PROTFLT */ 134 "trace trap", /* 10 T_TRCTRAP */ 135 "", /* 11 unused */ 136 "page fault", /* 12 T_PAGEFLT */ 137 "", /* 13 unused */ 138 "alignment fault", /* 14 T_ALIGNFLT */ 139 "", /* 15 unused */ 140 "", /* 16 unused */ 141 "", /* 17 unused */ 142 "integer divide fault", /* 18 T_DIVIDE */ 143 "non-maskable interrupt trap", /* 19 T_NMI */ 144 "overflow trap", /* 20 T_OFLOW */ 145 "FPU bounds check fault", /* 21 T_BOUND */ 146 "FPU device not available", /* 22 T_DNA */ 147 "double fault", /* 23 T_DOUBLEFLT */ 148 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 149 "invalid TSS fault", /* 25 T_TSSFLT */ 150 "segment not present fault", /* 26 T_SEGNPFLT */ 151 "stack fault", /* 27 T_STKFLT */ 152 "machine check trap", /* 28 T_MCHK */ 153 "SIMD floating-point exception", /* 29 T_XMMFLT */ 154 "reserved (unknown) fault", /* 30 T_RESERVED */ 155 }; 156 157 #ifdef DDB 158 static int ddb_on_nmi = 1; 159 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, 160 &ddb_on_nmi, 0, "Go to DDB on NMI"); 161 #endif 162 static int panic_on_nmi = 1; 163 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 164 &panic_on_nmi, 0, "Panic on NMI"); 165 static int fast_release; 166 SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, 167 &fast_release, 0, "Passive Release was optimal"); 168 static int slow_release; 169 SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, 170 &slow_release, 0, "Passive Release was nonoptimal"); 171 172 MALLOC_DEFINE(M_SYSMSG, "sysmsg", "sysmsg structure"); 173 extern int max_sysmsg; 174 175 /* 176 * Passively intercepts the thread switch function to increase the thread 177 * priority from a user priority to a kernel priority, reducing 178 * syscall and trap overhead for the case where no switch occurs. 179 * 180 * Synchronizes td_ucred with p_ucred. This is used by system calls, 181 * signal handling, faults, AST traps, and anything else that enters the 182 * kernel from userland and provides the kernel with a stable read-only 183 * copy of the process ucred. 184 */ 185 static __inline void 186 userenter(struct thread *curtd, struct proc *curp) 187 { 188 struct ucred *ocred; 189 struct ucred *ncred; 190 191 curtd->td_release = lwkt_passive_release; 192 193 if (curtd->td_ucred != curp->p_ucred) { 194 ncred = crhold(curp->p_ucred); 195 ocred = curtd->td_ucred; 196 curtd->td_ucred = ncred; 197 if (ocred) 198 crfree(ocred); 199 } 200 } 201 202 /* 203 * Handle signals, upcalls, profiling, and other AST's and/or tasks that 204 * must be completed before we can return to or try to return to userland. 205 * 206 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 207 * arithmatic on the delta calculation so the absolute tick values are 208 * truncated to an integer. 209 */ 210 static void 211 userret(struct lwp *lp, struct trapframe *frame, int sticks) 212 { 213 struct proc *p = lp->lwp_proc; 214 int sig; 215 216 /* 217 * Charge system time if profiling. Note: times are in microseconds. 218 * This may do a copyout and block, so do it first even though it 219 * means some system time will be charged as user time. 220 */ 221 if (p->p_flag & P_PROFIL) { 222 addupc_task(p, frame->tf_rip, 223 (u_int)((int)lp->lwp_thread->td_sticks - sticks)); 224 } 225 226 recheck: 227 /* 228 * If the jungle wants us dead, so be it. 229 */ 230 if (lp->lwp_flag & LWP_WEXIT) { 231 lwkt_gettoken(&p->p_token); 232 lwp_exit(0); 233 lwkt_reltoken(&p->p_token); /* NOT REACHED */ 234 } 235 236 /* 237 * Block here if we are in a stopped state. 238 */ 239 if (p->p_stat == SSTOP) { 240 get_mplock(); 241 tstop(); 242 rel_mplock(); 243 goto recheck; 244 } 245 246 /* 247 * Post any pending upcalls 248 */ 249 if (p->p_flag & P_UPCALLPEND) { 250 get_mplock(); 251 p->p_flag &= ~P_UPCALLPEND; 252 postupcall(lp); 253 rel_mplock(); 254 goto recheck; 255 } 256 257 /* 258 * Post any pending signals 259 * 260 * WARNING! postsig() can exit and not return. 261 */ 262 if ((sig = CURSIG_TRACE(lp)) != 0) { 263 get_mplock(); 264 postsig(sig); 265 rel_mplock(); 266 goto recheck; 267 } 268 269 /* 270 * block here if we are swapped out, but still process signals 271 * (such as SIGKILL). proc0 (the swapin scheduler) is already 272 * aware of our situation, we do not have to wake it up. 273 */ 274 if (p->p_flag & P_SWAPPEDOUT) { 275 get_mplock(); 276 p->p_flag |= P_SWAPWAIT; 277 swapin_request(); 278 if (p->p_flag & P_SWAPWAIT) 279 tsleep(p, PCATCH, "SWOUT", 0); 280 p->p_flag &= ~P_SWAPWAIT; 281 rel_mplock(); 282 goto recheck; 283 } 284 285 /* 286 * Make sure postsig() handled request to restore old signal mask after 287 * running signal handler. 288 */ 289 KKASSERT((lp->lwp_flag & LWP_OLDMASK) == 0); 290 } 291 292 /* 293 * Cleanup from userenter and any passive release that might have occured. 294 * We must reclaim the current-process designation before we can return 295 * to usermode. We also handle both LWKT and USER reschedule requests. 296 */ 297 static __inline void 298 userexit(struct lwp *lp) 299 { 300 struct thread *td = lp->lwp_thread; 301 /* globaldata_t gd = td->td_gd; */ 302 303 /* 304 * Handle stop requests at kernel priority. Any requests queued 305 * after this loop will generate another AST. 306 */ 307 while (lp->lwp_proc->p_stat == SSTOP) { 308 get_mplock(); 309 tstop(); 310 rel_mplock(); 311 } 312 313 /* 314 * Reduce our priority in preparation for a return to userland. If 315 * our passive release function was still in place, our priority was 316 * never raised and does not need to be reduced. 317 */ 318 lwkt_passive_recover(td); 319 320 /* 321 * Become the current user scheduled process if we aren't already, 322 * and deal with reschedule requests and other factors. 323 */ 324 lp->lwp_proc->p_usched->acquire_curproc(lp); 325 /* WARNING: we may have migrated cpu's */ 326 /* gd = td->td_gd; */ 327 } 328 329 #if !defined(KTR_KERNENTRY) 330 #define KTR_KERNENTRY KTR_ALL 331 #endif 332 KTR_INFO_MASTER(kernentry); 333 KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, "pid=%d, tid=%d, trapno=%d, eva=%p", 334 sizeof(int) + sizeof(int) + sizeof(int) + sizeof(vm_offset_t)); 335 KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "pid=%d, tid=%d", 336 sizeof(int) + sizeof(int)); 337 KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "pid=%d, tid=%d, call=%d", 338 sizeof(int) + sizeof(int) + sizeof(int)); 339 KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "pid=%d, tid=%d, err=%d", 340 sizeof(int) + sizeof(int) + sizeof(int)); 341 KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "pid=%d, tid=%d", 342 sizeof(int) + sizeof(int)); 343 344 /* 345 * Exception, fault, and trap interface to the kernel. 346 * This common code is called from assembly language IDT gate entry 347 * routines that prepare a suitable stack frame, and restore this 348 * frame after the exception has been processed. 349 * 350 * This function is also called from doreti in an interlock to handle ASTs. 351 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap 352 * 353 * NOTE! We have to retrieve the fault address prior to obtaining the 354 * MP lock because get_mplock() may switch out. YYY cr2 really ought 355 * to be retrieved by the assembly code, not here. 356 * 357 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing 358 * if an attempt is made to switch from a fast interrupt or IPI. This is 359 * necessary to properly take fatal kernel traps on SMP machines if 360 * get_mplock() has to block. 361 */ 362 363 void 364 user_trap(struct trapframe *frame) 365 { 366 struct globaldata *gd = mycpu; 367 struct thread *td = gd->gd_curthread; 368 struct lwp *lp = td->td_lwp; 369 struct proc *p; 370 int sticks = 0; 371 int i = 0, ucode = 0, type, code; 372 #ifdef SMP 373 int have_mplock = 0; 374 #endif 375 #ifdef INVARIANTS 376 int crit_count = td->td_critcount; 377 lwkt_tokref_t curstop = td->td_toks_stop; 378 #endif 379 vm_offset_t eva; 380 381 p = td->td_proc; 382 383 if (frame->tf_trapno == T_PAGEFLT) 384 eva = frame->tf_addr; 385 else 386 eva = 0; 387 #if 0 388 kprintf("USER_TRAP AT %08lx xflags %ld trapno %ld eva %08lx\n", 389 frame->tf_rip, frame->tf_xflags, frame->tf_trapno, eva); 390 #endif 391 392 /* 393 * Everything coming from user mode runs through user_trap, 394 * including system calls. 395 */ 396 if (frame->tf_trapno == T_FAST_SYSCALL) { 397 syscall2(frame); 398 return; 399 } 400 401 KTR_LOG(kernentry_trap, lp->lwp_proc->p_pid, lp->lwp_tid, 402 frame->tf_trapno, eva); 403 404 #ifdef DDB 405 if (db_active) { 406 eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0); 407 ++gd->gd_trap_nesting_level; 408 MAKEMPSAFE(have_mplock); 409 trap_fatal(frame, TRUE, eva); 410 --gd->gd_trap_nesting_level; 411 goto out2; 412 } 413 #endif 414 415 type = frame->tf_trapno; 416 code = frame->tf_err; 417 418 userenter(td, p); 419 420 sticks = (int)td->td_sticks; 421 lp->lwp_md.md_regs = frame; 422 423 switch (type) { 424 case T_PRIVINFLT: /* privileged instruction fault */ 425 ucode = type; 426 i = SIGILL; 427 break; 428 429 case T_BPTFLT: /* bpt instruction fault */ 430 case T_TRCTRAP: /* trace trap */ 431 frame->tf_rflags &= ~PSL_T; 432 i = SIGTRAP; 433 break; 434 435 case T_ARITHTRAP: /* arithmetic trap */ 436 ucode = code; 437 i = SIGFPE; 438 break; 439 440 case T_ASTFLT: /* Allow process switch */ 441 mycpu->gd_cnt.v_soft++; 442 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { 443 atomic_clear_int(&mycpu->gd_reqflags, RQF_AST_OWEUPC); 444 addupc_task(p, p->p_prof.pr_addr, p->p_prof.pr_ticks); 445 } 446 goto out; 447 448 /* 449 * The following two traps can happen in 450 * vm86 mode, and, if so, we want to handle 451 * them specially. 452 */ 453 case T_PROTFLT: /* general protection fault */ 454 case T_STKFLT: /* stack fault */ 455 #if 0 456 if (frame->tf_eflags & PSL_VM) { 457 i = vm86_emulate((struct vm86frame *)frame); 458 if (i == 0) 459 goto out; 460 break; 461 } 462 #endif 463 /* FALL THROUGH */ 464 465 case T_SEGNPFLT: /* segment not present fault */ 466 case T_TSSFLT: /* invalid TSS fault */ 467 case T_DOUBLEFLT: /* double fault */ 468 default: 469 ucode = code + BUS_SEGM_FAULT ; 470 i = SIGBUS; 471 break; 472 473 case T_PAGEFLT: /* page fault */ 474 MAKEMPSAFE(have_mplock); 475 i = trap_pfault(frame, TRUE, eva); 476 if (i == -1 || i == 0) 477 goto out; 478 479 ucode = T_PAGEFLT; 480 break; 481 482 case T_DIVIDE: /* integer divide fault */ 483 ucode = FPE_INTDIV; 484 i = SIGFPE; 485 break; 486 487 #if NISA > 0 488 case T_NMI: 489 MAKEMPSAFE(have_mplock); 490 /* machine/parity/power fail/"kitchen sink" faults */ 491 if (isa_nmi(code) == 0) { 492 #ifdef DDB 493 /* 494 * NMI can be hooked up to a pushbutton 495 * for debugging. 496 */ 497 if (ddb_on_nmi) { 498 kprintf ("NMI ... going to debugger\n"); 499 kdb_trap (type, 0, frame); 500 } 501 #endif /* DDB */ 502 goto out2; 503 } else if (panic_on_nmi) 504 panic("NMI indicates hardware failure"); 505 break; 506 #endif /* NISA > 0 */ 507 508 case T_OFLOW: /* integer overflow fault */ 509 ucode = FPE_INTOVF; 510 i = SIGFPE; 511 break; 512 513 case T_BOUND: /* bounds check fault */ 514 ucode = FPE_FLTSUB; 515 i = SIGFPE; 516 break; 517 518 case T_DNA: 519 /* 520 * Virtual kernel intercept - pass the DNA exception 521 * to the (emulated) virtual kernel if it asked to handle 522 * it. This occurs when the virtual kernel is holding 523 * onto the FP context for a different emulated 524 * process then the one currently running. 525 * 526 * We must still call npxdna() since we may have 527 * saved FP state that the (emulated) virtual kernel 528 * needs to hand over to a different emulated process. 529 */ 530 if (lp->lwp_vkernel && lp->lwp_vkernel->ve && 531 (td->td_pcb->pcb_flags & FP_VIRTFP) 532 ) { 533 npxdna(frame); 534 break; 535 } 536 /* 537 * The kernel may have switched out the FP unit's 538 * state, causing the user process to take a fault 539 * when it tries to use the FP unit. Restore the 540 * state here 541 */ 542 if (npxdna(frame)) 543 goto out; 544 if (!pmath_emulate) { 545 i = SIGFPE; 546 ucode = FPE_FPU_NP_TRAP; 547 break; 548 } 549 i = (*pmath_emulate)(frame); 550 if (i == 0) { 551 if (!(frame->tf_rflags & PSL_T)) 552 goto out2; 553 frame->tf_rflags &= ~PSL_T; 554 i = SIGTRAP; 555 } 556 /* else ucode = emulator_only_knows() XXX */ 557 break; 558 559 case T_FPOPFLT: /* FPU operand fetch fault */ 560 ucode = T_FPOPFLT; 561 i = SIGILL; 562 break; 563 564 case T_XMMFLT: /* SIMD floating-point exception */ 565 ucode = 0; /* XXX */ 566 i = SIGFPE; 567 break; 568 } 569 570 /* 571 * Virtual kernel intercept - if the fault is directly related to a 572 * VM context managed by a virtual kernel then let the virtual kernel 573 * handle it. 574 */ 575 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 576 vkernel_trap(lp, frame); 577 goto out; 578 } 579 580 /* 581 * Translate fault for emulators (e.g. Linux) 582 */ 583 if (*p->p_sysent->sv_transtrap) 584 i = (*p->p_sysent->sv_transtrap)(i, type); 585 586 MAKEMPSAFE(have_mplock); 587 trapsignal(lp, i, ucode); 588 589 #ifdef DEBUG 590 if (type <= MAX_TRAP_MSG) { 591 uprintf("fatal process exception: %s", 592 trap_msg[type]); 593 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 594 uprintf(", fault VA = 0x%lx", (u_long)eva); 595 uprintf("\n"); 596 } 597 #endif 598 599 out: 600 userret(lp, frame, sticks); 601 userexit(lp); 602 out2: ; 603 #ifdef SMP 604 if (have_mplock) 605 rel_mplock(); 606 #endif 607 KTR_LOG(kernentry_trap_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 608 #ifdef INVARIANTS 609 KASSERT(crit_count == td->td_critcount, 610 ("trap: critical section count mismatch! %d/%d", 611 crit_count, td->td_pri)); 612 KASSERT(curstop == td->td_toks_stop, 613 ("trap: extra tokens held after trap! %ld/%ld", 614 curstop - &td->td_toks_base, 615 td->td_toks_stop - &td->td_toks_base)); 616 #endif 617 } 618 619 void 620 kern_trap(struct trapframe *frame) 621 { 622 struct globaldata *gd = mycpu; 623 struct thread *td = gd->gd_curthread; 624 struct lwp *lp; 625 struct proc *p; 626 int i = 0, ucode = 0, type, code; 627 #ifdef SMP 628 int have_mplock = 0; 629 #endif 630 #ifdef INVARIANTS 631 int crit_count = td->td_critcount; 632 lwkt_tokref_t curstop = td->td_toks_stop; 633 #endif 634 vm_offset_t eva; 635 636 lp = td->td_lwp; 637 p = td->td_proc; 638 639 if (frame->tf_trapno == T_PAGEFLT) 640 eva = frame->tf_addr; 641 else 642 eva = 0; 643 644 #ifdef DDB 645 if (db_active) { 646 ++gd->gd_trap_nesting_level; 647 MAKEMPSAFE(have_mplock); 648 trap_fatal(frame, FALSE, eva); 649 --gd->gd_trap_nesting_level; 650 goto out2; 651 } 652 #endif 653 654 type = frame->tf_trapno; 655 code = frame->tf_err; 656 657 #if 0 658 kernel_trap: 659 #endif 660 /* kernel trap */ 661 662 switch (type) { 663 case T_PAGEFLT: /* page fault */ 664 MAKEMPSAFE(have_mplock); 665 trap_pfault(frame, FALSE, eva); 666 goto out2; 667 668 case T_DNA: 669 /* 670 * The kernel may be using npx for copying or other 671 * purposes. 672 */ 673 panic("kernel NPX should not happen"); 674 if (npxdna(frame)) 675 goto out2; 676 break; 677 678 case T_PROTFLT: /* general protection fault */ 679 case T_SEGNPFLT: /* segment not present fault */ 680 /* 681 * Invalid segment selectors and out of bounds 682 * %eip's and %esp's can be set up in user mode. 683 * This causes a fault in kernel mode when the 684 * kernel tries to return to user mode. We want 685 * to get this fault so that we can fix the 686 * problem here and not have to check all the 687 * selectors and pointers when the user changes 688 * them. 689 */ 690 if (mycpu->gd_intr_nesting_level == 0) { 691 if (td->td_pcb->pcb_onfault) { 692 frame->tf_rip = 693 (register_t)td->td_pcb->pcb_onfault; 694 goto out2; 695 } 696 } 697 break; 698 699 case T_TSSFLT: 700 /* 701 * PSL_NT can be set in user mode and isn't cleared 702 * automatically when the kernel is entered. This 703 * causes a TSS fault when the kernel attempts to 704 * `iret' because the TSS link is uninitialized. We 705 * want to get this fault so that we can fix the 706 * problem here and not every time the kernel is 707 * entered. 708 */ 709 if (frame->tf_rflags & PSL_NT) { 710 frame->tf_rflags &= ~PSL_NT; 711 goto out2; 712 } 713 break; 714 715 case T_TRCTRAP: /* trace trap */ 716 #if 0 717 if (frame->tf_eip == (int)IDTVEC(syscall)) { 718 /* 719 * We've just entered system mode via the 720 * syscall lcall. Continue single stepping 721 * silently until the syscall handler has 722 * saved the flags. 723 */ 724 goto out2; 725 } 726 if (frame->tf_eip == (int)IDTVEC(syscall) + 1) { 727 /* 728 * The syscall handler has now saved the 729 * flags. Stop single stepping it. 730 */ 731 frame->tf_eflags &= ~PSL_T; 732 goto out2; 733 } 734 #endif 735 #if 0 736 /* 737 * Ignore debug register trace traps due to 738 * accesses in the user's address space, which 739 * can happen under several conditions such as 740 * if a user sets a watchpoint on a buffer and 741 * then passes that buffer to a system call. 742 * We still want to get TRCTRAPS for addresses 743 * in kernel space because that is useful when 744 * debugging the kernel. 745 */ 746 if (user_dbreg_trap()) { 747 /* 748 * Reset breakpoint bits because the 749 * processor doesn't 750 */ 751 load_dr6(rdr6() & 0xfffffff0); 752 goto out2; 753 } 754 #endif 755 /* 756 * Fall through (TRCTRAP kernel mode, kernel address) 757 */ 758 case T_BPTFLT: 759 /* 760 * If DDB is enabled, let it handle the debugger trap. 761 * Otherwise, debugger traps "can't happen". 762 */ 763 #ifdef DDB 764 MAKEMPSAFE(have_mplock); 765 if (kdb_trap (type, 0, frame)) 766 goto out2; 767 #endif 768 break; 769 case T_DIVIDE: 770 MAKEMPSAFE(have_mplock); 771 trap_fatal(frame, FALSE, eva); 772 goto out2; 773 case T_NMI: 774 MAKEMPSAFE(have_mplock); 775 trap_fatal(frame, FALSE, eva); 776 goto out2; 777 case T_SYSCALL80: 778 case T_FAST_SYSCALL: 779 /* 780 * Ignore this trap generated from a spurious SIGTRAP. 781 * 782 * single stepping in / syscalls leads to spurious / SIGTRAP 783 * so ignore 784 * 785 * Haiku (c) 2007 Simon 'corecode' Schubert 786 */ 787 goto out2; 788 } 789 790 /* 791 * Translate fault for emulators (e.g. Linux) 792 */ 793 if (*p->p_sysent->sv_transtrap) 794 i = (*p->p_sysent->sv_transtrap)(i, type); 795 796 MAKEMPSAFE(have_mplock); 797 trapsignal(lp, i, ucode); 798 799 #ifdef DEBUG 800 if (type <= MAX_TRAP_MSG) { 801 uprintf("fatal process exception: %s", 802 trap_msg[type]); 803 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 804 uprintf(", fault VA = 0x%lx", (u_long)eva); 805 uprintf("\n"); 806 } 807 #endif 808 809 out2: 810 ; 811 #ifdef SMP 812 if (have_mplock) 813 rel_mplock(); 814 #endif 815 #ifdef INVARIANTS 816 KASSERT(crit_count == td->td_critcount, 817 ("trap: critical section count mismatch! %d/%d", 818 crit_count, td->td_pri)); 819 KASSERT(curstop == td->td_toks_stop, 820 ("trap: extra tokens held after trap! %ld/%ld", 821 curstop - &td->td_toks_base, 822 td->td_toks_stop - &td->td_toks_base)); 823 #endif 824 } 825 826 int 827 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) 828 { 829 vm_offset_t va; 830 struct vmspace *vm = NULL; 831 vm_map_t map = 0; 832 int rv = 0; 833 vm_prot_t ftype; 834 thread_t td = curthread; 835 struct lwp *lp = td->td_lwp; 836 837 va = trunc_page(eva); 838 if (usermode == FALSE) { 839 /* 840 * This is a fault on kernel virtual memory. 841 */ 842 map = &kernel_map; 843 } else { 844 /* 845 * This is a fault on non-kernel virtual memory. 846 * vm is initialized above to NULL. If curproc is NULL 847 * or curproc->p_vmspace is NULL the fault is fatal. 848 */ 849 if (lp != NULL) 850 vm = lp->lwp_vmspace; 851 852 if (vm == NULL) 853 goto nogo; 854 855 map = &vm->vm_map; 856 } 857 858 if (frame->tf_err & PGEX_W) 859 ftype = VM_PROT_READ | VM_PROT_WRITE; 860 else 861 ftype = VM_PROT_READ; 862 863 if (map != &kernel_map) { 864 /* 865 * Keep swapout from messing with us during this 866 * critical time. 867 */ 868 PHOLD(lp->lwp_proc); 869 870 /* 871 * Grow the stack if necessary 872 */ 873 /* grow_stack returns false only if va falls into 874 * a growable stack region and the stack growth 875 * fails. It returns true if va was not within 876 * a growable stack region, or if the stack 877 * growth succeeded. 878 */ 879 if (!grow_stack (lp->lwp_proc, va)) { 880 rv = KERN_FAILURE; 881 PRELE(lp->lwp_proc); 882 goto nogo; 883 } 884 885 /* Fault in the user page: */ 886 rv = vm_fault(map, va, ftype, 887 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY 888 : VM_FAULT_NORMAL); 889 890 PRELE(lp->lwp_proc); 891 } else { 892 /* 893 * Don't have to worry about process locking or stacks in the kernel. 894 */ 895 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 896 } 897 898 if (rv == KERN_SUCCESS) 899 return (0); 900 nogo: 901 if (!usermode) { 902 if (td->td_gd->gd_intr_nesting_level == 0 && 903 td->td_pcb->pcb_onfault) { 904 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; 905 return (0); 906 } 907 trap_fatal(frame, usermode, eva); 908 return (-1); 909 } 910 911 /* 912 * NOTE: on x86_64 we have a tf_addr field in the trapframe, no 913 * kludge is needed to pass the fault address to signal handlers. 914 */ 915 struct proc *p = td->td_proc; 916 kprintf("seg-fault accessing address %p rip=%p pid=%d p_comm=%s\n", 917 (void *)va, (void *)frame->tf_rip, p->p_pid, p->p_comm); 918 /* Debugger("seg-fault"); */ 919 920 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 921 } 922 923 static void 924 trap_fatal(struct trapframe *frame, int usermode, vm_offset_t eva) 925 { 926 int code, type, ss; 927 long rsp; 928 929 code = frame->tf_xflags; 930 type = frame->tf_trapno; 931 932 if (type <= MAX_TRAP_MSG) { 933 kprintf("\n\nFatal trap %d: %s while in %s mode\n", 934 type, trap_msg[type], 935 (usermode ? "user" : "kernel")); 936 } 937 #ifdef SMP 938 /* two separate prints in case of a trap on an unmapped page */ 939 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 940 #endif 941 if (type == T_PAGEFLT) { 942 kprintf("fault virtual address = %p\n", (void *)eva); 943 kprintf("fault code = %s %s, %s\n", 944 usermode ? "user" : "supervisor", 945 code & PGEX_W ? "write" : "read", 946 code & PGEX_P ? "protection violation" : "page not present"); 947 } 948 kprintf("instruction pointer = 0x%lx:0x%lx\n", 949 frame->tf_cs & 0xffff, frame->tf_rip); 950 if (usermode) { 951 ss = frame->tf_ss & 0xffff; 952 rsp = frame->tf_rsp; 953 } else { 954 ss = GSEL(GDATA_SEL, SEL_KPL); 955 rsp = (long)&frame->tf_rsp; 956 } 957 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); 958 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 959 kprintf("processor eflags = "); 960 if (frame->tf_rflags & PSL_T) 961 kprintf("trace trap, "); 962 if (frame->tf_rflags & PSL_I) 963 kprintf("interrupt enabled, "); 964 if (frame->tf_rflags & PSL_NT) 965 kprintf("nested task, "); 966 if (frame->tf_rflags & PSL_RF) 967 kprintf("resume, "); 968 #if 0 969 if (frame->tf_eflags & PSL_VM) 970 kprintf("vm86, "); 971 #endif 972 kprintf("IOPL = %jd\n", (intmax_t)((frame->tf_rflags & PSL_IOPL) >> 12)); 973 kprintf("current process = "); 974 if (curproc) { 975 kprintf("%lu (%s)\n", 976 (u_long)curproc->p_pid, curproc->p_comm ? 977 curproc->p_comm : ""); 978 } else { 979 kprintf("Idle\n"); 980 } 981 kprintf("current thread = pri %d ", curthread->td_pri); 982 if (curthread->td_critcount) 983 kprintf("(CRIT)"); 984 kprintf("\n"); 985 #ifdef SMP 986 /** 987 * XXX FIXME: 988 * we probably SHOULD have stopped the other CPUs before now! 989 * another CPU COULD have been touching cpl at this moment... 990 */ 991 kprintf(" <- SMP: XXX"); 992 #endif 993 kprintf("\n"); 994 995 #ifdef KDB 996 if (kdb_trap(&psl)) 997 return; 998 #endif 999 #ifdef DDB 1000 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) 1001 return; 1002 #endif 1003 kprintf("trap number = %d\n", type); 1004 if (type <= MAX_TRAP_MSG) 1005 panic("%s", trap_msg[type]); 1006 else 1007 panic("unknown/reserved trap"); 1008 } 1009 1010 /* 1011 * Double fault handler. Called when a fault occurs while writing 1012 * a frame for a trap/exception onto the stack. This usually occurs 1013 * when the stack overflows (such is the case with infinite recursion, 1014 * for example). 1015 * 1016 * XXX Note that the current PTD gets replaced by IdlePTD when the 1017 * task switch occurs. This means that the stack that was active at 1018 * the time of the double fault is not available at <kstack> unless 1019 * the machine was idle when the double fault occurred. The downside 1020 * of this is that "trace <ebp>" in ddb won't work. 1021 */ 1022 void 1023 dblfault_handler(void) 1024 { 1025 #if JG 1026 struct mdglobaldata *gd = mdcpu; 1027 #endif 1028 1029 kprintf("\nFatal double fault:\n"); 1030 #if JG 1031 kprintf("rip = 0x%lx\n", gd->gd_common_tss.tss_rip); 1032 kprintf("rsp = 0x%lx\n", gd->gd_common_tss.tss_rsp); 1033 kprintf("rbp = 0x%lx\n", gd->gd_common_tss.tss_rbp); 1034 #endif 1035 #ifdef SMP 1036 /* two separate prints in case of a trap on an unmapped page */ 1037 kprintf("cpuid = %d\n", mycpu->gd_cpuid); 1038 #endif 1039 panic("double fault"); 1040 } 1041 1042 /* 1043 * Compensate for 386 brain damage (missing URKR). 1044 * This is a little simpler than the pagefault handler in trap() because 1045 * it the page tables have already been faulted in and high addresses 1046 * are thrown out early for other reasons. 1047 */ 1048 int 1049 trapwrite(unsigned addr) 1050 { 1051 struct lwp *lp; 1052 vm_offset_t va; 1053 struct vmspace *vm; 1054 int rv; 1055 1056 va = trunc_page((vm_offset_t)addr); 1057 /* 1058 * XXX - MAX is END. Changed > to >= for temp. fix. 1059 */ 1060 if (va >= VM_MAX_USER_ADDRESS) 1061 return (1); 1062 1063 lp = curthread->td_lwp; 1064 vm = lp->lwp_vmspace; 1065 1066 PHOLD(lp->lwp_proc); 1067 1068 if (!grow_stack (lp->lwp_proc, va)) { 1069 PRELE(lp->lwp_proc); 1070 return (1); 1071 } 1072 1073 /* 1074 * fault the data page 1075 */ 1076 rv = vm_fault(&vm->vm_map, va, VM_PROT_WRITE, VM_FAULT_DIRTY); 1077 1078 PRELE(lp->lwp_proc); 1079 1080 if (rv != KERN_SUCCESS) 1081 return 1; 1082 1083 return (0); 1084 } 1085 1086 /* 1087 * syscall2 - MP aware system call request C handler 1088 * 1089 * A system call is essentially treated as a trap except that the 1090 * MP lock is not held on entry or return. We are responsible for 1091 * obtaining the MP lock if necessary and for handling ASTs 1092 * (e.g. a task switch) prior to return. 1093 * 1094 * In general, only simple access and manipulation of curproc and 1095 * the current stack is allowed without having to hold MP lock. 1096 * 1097 * MPSAFE - note that large sections of this routine are run without 1098 * the MP lock. 1099 */ 1100 void 1101 syscall2(struct trapframe *frame) 1102 { 1103 struct thread *td = curthread; 1104 struct proc *p = td->td_proc; 1105 struct lwp *lp = td->td_lwp; 1106 caddr_t params; 1107 struct sysent *callp; 1108 register_t orig_tf_rflags; 1109 int sticks; 1110 int error; 1111 int narg; 1112 #ifdef INVARIANTS 1113 int crit_count = td->td_critcount; 1114 lwkt_tokref_t curstop = td->td_toks_stop; 1115 #endif 1116 #ifdef SMP 1117 int have_mplock = 0; 1118 #endif 1119 register_t *argp; 1120 u_int code; 1121 int reg, regcnt; 1122 union sysunion args; 1123 register_t *argsdst; 1124 1125 mycpu->gd_cnt.v_syscall++; 1126 1127 KTR_LOG(kernentry_syscall, lp->lwp_proc->p_pid, lp->lwp_tid, 1128 frame->tf_eax); 1129 1130 userenter(td, p); /* lazy raise our priority */ 1131 1132 reg = 0; 1133 regcnt = 6; 1134 /* 1135 * Misc 1136 */ 1137 sticks = (int)td->td_sticks; 1138 orig_tf_rflags = frame->tf_rflags; 1139 1140 /* 1141 * Virtual kernel intercept - if a VM context managed by a virtual 1142 * kernel issues a system call the virtual kernel handles it, not us. 1143 * Restore the virtual kernel context and return from its system 1144 * call. The current frame is copied out to the virtual kernel. 1145 */ 1146 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 1147 vkernel_trap(lp, frame); 1148 error = EJUSTRETURN; 1149 goto out; 1150 } 1151 1152 /* 1153 * Get the system call parameters and account for time 1154 */ 1155 lp->lwp_md.md_regs = frame; 1156 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1157 code = frame->tf_rax; 1158 1159 if (p->p_sysent->sv_prepsyscall) { 1160 (*p->p_sysent->sv_prepsyscall)( 1161 frame, (int *)(&args.nosys.sysmsg + 1), 1162 &code, ¶ms); 1163 } else { 1164 if (code == SYS_syscall || code == SYS___syscall) { 1165 code = frame->tf_rdi; 1166 reg++; 1167 regcnt--; 1168 } 1169 } 1170 1171 if (p->p_sysent->sv_mask) 1172 code &= p->p_sysent->sv_mask; 1173 1174 if (code >= p->p_sysent->sv_size) 1175 callp = &p->p_sysent->sv_table[0]; 1176 else 1177 callp = &p->p_sysent->sv_table[code]; 1178 1179 narg = callp->sy_narg & SYF_ARGMASK; 1180 1181 /* 1182 * On x86_64 we get up to six arguments in registers. The rest are 1183 * on the stack. The first six members of 'struct trapframe' happen 1184 * to be the registers used to pass arguments, in exactly the right 1185 * order. 1186 */ 1187 argp = &frame->tf_rdi; 1188 argp += reg; 1189 argsdst = (register_t *)(&args.nosys.sysmsg + 1); 1190 /* 1191 * JG can we overflow the space pointed to by 'argsdst' 1192 * either with 'bcopy' or with 'copyin'? 1193 */ 1194 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1195 /* 1196 * copyin is MP aware, but the tracing code is not 1197 */ 1198 if (narg > regcnt) { 1199 KASSERT(params != NULL, ("copyin args with no params!")); 1200 error = copyin(params, &argsdst[regcnt], 1201 (narg - regcnt) * sizeof(register_t)); 1202 if (error) { 1203 #ifdef KTRACE 1204 if (KTRPOINT(td, KTR_SYSCALL)) { 1205 MAKEMPSAFE(have_mplock); 1206 1207 ktrsyscall(lp, code, narg, 1208 (void *)(&args.nosys.sysmsg + 1)); 1209 } 1210 #endif 1211 goto bad; 1212 } 1213 } 1214 1215 #ifdef KTRACE 1216 if (KTRPOINT(td, KTR_SYSCALL)) { 1217 MAKEMPSAFE(have_mplock); 1218 ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); 1219 } 1220 #endif 1221 1222 /* 1223 * Default return value is 0 (will be copied to %rax). Double-value 1224 * returns use %rax and %rdx. %rdx is left unchanged for system 1225 * calls which return only one result. 1226 */ 1227 args.sysmsg_fds[0] = 0; 1228 args.sysmsg_fds[1] = frame->tf_rdx; 1229 1230 /* 1231 * The syscall might manipulate the trap frame. If it does it 1232 * will probably return EJUSTRETURN. 1233 */ 1234 args.sysmsg_frame = frame; 1235 1236 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1237 1238 /* 1239 * NOTE: All system calls run MPSAFE now. The system call itself 1240 * is responsible for getting the MP lock. 1241 */ 1242 error = (*callp->sy_call)(&args); 1243 1244 #if 0 1245 kprintf("system call %d returned %d\n", code, error); 1246 #endif 1247 1248 out: 1249 /* 1250 * MP SAFE (we may or may not have the MP lock at this point) 1251 */ 1252 switch (error) { 1253 case 0: 1254 /* 1255 * Reinitialize proc pointer `p' as it may be different 1256 * if this is a child returning from fork syscall. 1257 */ 1258 p = curproc; 1259 lp = curthread->td_lwp; 1260 frame->tf_rax = args.sysmsg_fds[0]; 1261 frame->tf_rdx = args.sysmsg_fds[1]; 1262 frame->tf_rflags &= ~PSL_C; 1263 break; 1264 case ERESTART: 1265 /* 1266 * Reconstruct pc, we know that 'syscall' is 2 bytes. 1267 * We have to do a full context restore so that %r10 1268 * (which was holding the value of %rcx) is restored for 1269 * the next iteration. 1270 */ 1271 frame->tf_rip -= frame->tf_err; 1272 frame->tf_r10 = frame->tf_rcx; 1273 break; 1274 case EJUSTRETURN: 1275 break; 1276 case EASYNC: 1277 panic("Unexpected EASYNC return value (for now)"); 1278 default: 1279 bad: 1280 if (p->p_sysent->sv_errsize) { 1281 if (error >= p->p_sysent->sv_errsize) 1282 error = -1; /* XXX */ 1283 else 1284 error = p->p_sysent->sv_errtbl[error]; 1285 } 1286 frame->tf_rax = error; 1287 frame->tf_rflags |= PSL_C; 1288 break; 1289 } 1290 1291 /* 1292 * Traced syscall. trapsignal() is not MP aware. 1293 */ 1294 if (orig_tf_rflags & PSL_T) { 1295 MAKEMPSAFE(have_mplock); 1296 frame->tf_rflags &= ~PSL_T; 1297 trapsignal(lp, SIGTRAP, 0); 1298 } 1299 1300 /* 1301 * Handle reschedule and other end-of-syscall issues 1302 */ 1303 userret(lp, frame, sticks); 1304 1305 #ifdef KTRACE 1306 if (KTRPOINT(td, KTR_SYSRET)) { 1307 MAKEMPSAFE(have_mplock); 1308 ktrsysret(lp, code, error, args.sysmsg_result); 1309 } 1310 #endif 1311 1312 /* 1313 * This works because errno is findable through the 1314 * register set. If we ever support an emulation where this 1315 * is not the case, this code will need to be revisited. 1316 */ 1317 STOPEVENT(p, S_SCX, code); 1318 1319 userexit(lp); 1320 #ifdef SMP 1321 /* 1322 * Release the MP lock if we had to get it 1323 */ 1324 if (have_mplock) 1325 rel_mplock(); 1326 #endif 1327 KTR_LOG(kernentry_syscall_ret, lp->lwp_proc->p_pid, lp->lwp_tid, error); 1328 #ifdef INVARIANTS 1329 KASSERT(&td->td_toks_base == td->td_toks_stop, 1330 ("syscall: critical section count mismatch! %d/%d", 1331 crit_count, td->td_pri)); 1332 KASSERT(curstop == td->td_toks_stop, 1333 ("syscall: extra tokens held after trap! %ld", 1334 td->td_toks_stop - &td->td_toks_base)); 1335 #endif 1336 } 1337 1338 /* 1339 * NOTE: mplock not held at any point 1340 */ 1341 void 1342 fork_return(struct lwp *lp, struct trapframe *frame) 1343 { 1344 frame->tf_rax = 0; /* Child returns zero */ 1345 frame->tf_rflags &= ~PSL_C; /* success */ 1346 frame->tf_rdx = 1; 1347 1348 generic_lwp_return(lp, frame); 1349 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 1350 } 1351 1352 /* 1353 * Simplified back end of syscall(), used when returning from fork() 1354 * directly into user mode. 1355 * 1356 * This code will return back into the fork trampoline code which then 1357 * runs doreti. 1358 * 1359 * NOTE: The mplock is not held at any point. 1360 */ 1361 void 1362 generic_lwp_return(struct lwp *lp, struct trapframe *frame) 1363 { 1364 struct proc *p = lp->lwp_proc; 1365 1366 /* 1367 * Newly forked processes are given a kernel priority. We have to 1368 * adjust the priority to a normal user priority and fake entry 1369 * into the kernel (call userenter()) to install a passive release 1370 * function just in case userret() decides to stop the process. This 1371 * can occur when ^Z races a fork. If we do not install the passive 1372 * release function the current process designation will not be 1373 * released when the thread goes to sleep. 1374 */ 1375 lwkt_setpri_self(TDPRI_USER_NORM); 1376 userenter(lp->lwp_thread, p); 1377 userret(lp, frame, 0); 1378 #ifdef KTRACE 1379 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) 1380 ktrsysret(lp, SYS_fork, 0, 0); 1381 #endif 1382 p->p_flag |= P_PASSIVE_ACQ; 1383 userexit(lp); 1384 p->p_flag &= ~P_PASSIVE_ACQ; 1385 } 1386 1387 /* 1388 * doreti has turned into this. The frame is directly on the stack. We 1389 * pull everything else we need (fpu and tls context) from the current 1390 * thread. 1391 * 1392 * Note on fpu interactions: In a virtual kernel, the fpu context for 1393 * an emulated user mode process is not shared with the virtual kernel's 1394 * fpu context, so we only have to 'stack' fpu contexts within the virtual 1395 * kernel itself, and not even then since the signal() contexts that we care 1396 * about save and restore the FPU state (I think anyhow). 1397 * 1398 * vmspace_ctl() returns an error only if it had problems instaling the 1399 * context we supplied or problems copying data to/from our VM space. 1400 */ 1401 void 1402 go_user(struct intrframe *frame) 1403 { 1404 struct trapframe *tf = (void *)&frame->if_rdi; 1405 int r; 1406 1407 /* 1408 * Interrupts may be disabled on entry, make sure all signals 1409 * can be received before beginning our loop. 1410 */ 1411 sigsetmask(0); 1412 1413 /* 1414 * Switch to the current simulated user process, then call 1415 * user_trap() when we break out of it (usually due to a signal). 1416 */ 1417 for (;;) { 1418 /* 1419 * Tell the real kernel whether it is ok to use the FP 1420 * unit or not. 1421 */ 1422 if (mdcpu->gd_npxthread == curthread) { 1423 tf->tf_xflags &= ~PGEX_FPFAULT; 1424 } else { 1425 tf->tf_xflags |= PGEX_FPFAULT; 1426 } 1427 1428 /* 1429 * Run emulated user process context. This call interlocks 1430 * with new mailbox signals. 1431 * 1432 * Set PGEX_U unconditionally, indicating a user frame (the 1433 * bit is normally set only by T_PAGEFLT). 1434 */ 1435 r = vmspace_ctl(&curproc->p_vmspace->vm_pmap, VMSPACE_CTL_RUN, 1436 tf, &curthread->td_savevext); 1437 frame->if_xflags |= PGEX_U; 1438 #if 0 1439 kprintf("GO USER %d trap %ld EVA %08lx RIP %08lx RSP %08lx XFLAGS %02lx/%02lx\n", 1440 r, tf->tf_trapno, tf->tf_addr, tf->tf_rip, tf->tf_rsp, 1441 tf->tf_xflags, frame->if_xflags); 1442 #endif 1443 if (r < 0) { 1444 if (errno != EINTR) 1445 panic("vmspace_ctl failed error %d", errno); 1446 } else { 1447 if (tf->tf_trapno) { 1448 user_trap(tf); 1449 } 1450 } 1451 if (mycpu->gd_reqflags & RQF_AST_MASK) { 1452 tf->tf_trapno = T_ASTFLT; 1453 user_trap(tf); 1454 } 1455 tf->tf_trapno = 0; 1456 } 1457 } 1458 1459 /* 1460 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA 1461 * fault (which is then passed back to the virtual kernel) if an attempt is 1462 * made to use the FP unit. 1463 * 1464 * XXX this is a fairly big hack. 1465 */ 1466 void 1467 set_vkernel_fp(struct trapframe *frame) 1468 { 1469 struct thread *td = curthread; 1470 1471 if (frame->tf_xflags & PGEX_FPFAULT) { 1472 td->td_pcb->pcb_flags |= FP_VIRTFP; 1473 if (mdcpu->gd_npxthread == td) 1474 npxexit(); 1475 } else { 1476 td->td_pcb->pcb_flags &= ~FP_VIRTFP; 1477 } 1478 } 1479 1480 /* 1481 * Called from vkernel_trap() to fixup the vkernel's syscall 1482 * frame for vmspace_ctl() return. 1483 */ 1484 void 1485 cpu_vkernel_trap(struct trapframe *frame, int error) 1486 { 1487 frame->tf_rax = error; 1488 if (error) 1489 frame->tf_rflags |= PSL_C; 1490 else 1491 frame->tf_rflags &= ~PSL_C; 1492 } 1493