1/*- 2 * Copyright (c) 1989, 1990 William F. Jolitz. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * Copyright (c) 2007 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by A. Joseph Koshy under 8 * sponsorship from the FreeBSD Foundation and Google, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $FreeBSD$ 35 */ 36 37#include "opt_atpic.h" 38#include "opt_compat.h" 39#include "opt_hwpmc_hooks.h" 40 41#include <machine/asmacros.h> 42#include <machine/psl.h> 43#include <machine/trap.h> 44#include <machine/specialreg.h> 45 46#include "assym.s" 47 48#ifdef KDTRACE_HOOKS 49 .bss 50 .globl dtrace_invop_jump_addr 51 .align 8 52 .type dtrace_invop_jump_addr,@object 53 .size dtrace_invop_jump_addr,8 54dtrace_invop_jump_addr: 55 .zero 8 56 .globl dtrace_invop_calltrap_addr 57 .align 8 58 .type dtrace_invop_calltrap_addr,@object 59 .size dtrace_invop_calltrap_addr,8 60dtrace_invop_calltrap_addr: 61 .zero 8 62#endif 63 .text 64#ifdef HWPMC_HOOKS 65 ENTRY(start_exceptions) 66#endif 67 68/*****************************************************************************/ 69/* Trap handling */ 70/*****************************************************************************/ 71/* 72 * Trap and fault vector routines. 73 * 74 * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes 75 * state on the stack but also disables interrupts. This is important for 76 * us for the use of the swapgs instruction. We cannot be interrupted 77 * until the GS.base value is correct. For most traps, we automatically 78 * then enable interrupts if the interrupted context had them enabled. 79 * This is equivalent to the i386 port's use of SDT_SYS386TGT. 80 * 81 * The cpu will push a certain amount of state onto the kernel stack for 82 * the current process. See amd64/include/frame.h. 83 * This includes the current RFLAGS (status register, which includes 84 * the interrupt disable state prior to the trap), the code segment register, 85 * and the return instruction pointer are pushed by the cpu. The cpu 86 * will also push an 'error' code for certain traps. We push a dummy 87 * error code for those traps where the cpu doesn't in order to maintain 88 * a consistent frame. We also push a contrived 'trap number'. 89 * 90 * The CPU does not push the general registers, so we must do that, and we 91 * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss 92 * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for 93 * for the kernel mode operation shortly, without changes to the selector 94 * loaded. Since superuser long mode works with any selectors loaded into 95 * segment registers other then %cs, which makes them mostly unused in long 96 * mode, and kernel does not reference %fs, leave them alone. The segment 97 * registers are reloaded on return to the usermode. 98 */ 99 100MCOUNT_LABEL(user) 101MCOUNT_LABEL(btrap) 102 103/* Traps that we leave interrupts disabled for.. */ 104#define TRAP_NOEN(a) \ 105 subq $TF_RIP,%rsp; \ 106 movl $(a),TF_TRAPNO(%rsp) ; \ 107 movq $0,TF_ADDR(%rsp) ; \ 108 movq $0,TF_ERR(%rsp) ; \ 109 jmp alltraps_noen 110IDTVEC(dbg) 111 TRAP_NOEN(T_TRCTRAP) 112IDTVEC(bpt) 113 TRAP_NOEN(T_BPTFLT) 114#ifdef KDTRACE_HOOKS 115IDTVEC(dtrace_ret) 116 TRAP_NOEN(T_DTRACE_RET) 117#endif 118 119/* Regular traps; The cpu does not supply tf_err for these. */ 120#define TRAP(a) \ 121 subq $TF_RIP,%rsp; \ 122 movl $(a),TF_TRAPNO(%rsp) ; \ 123 movq $0,TF_ADDR(%rsp) ; \ 124 movq $0,TF_ERR(%rsp) ; \ 125 jmp alltraps 126IDTVEC(div) 127 TRAP(T_DIVIDE) 128IDTVEC(ofl) 129 TRAP(T_OFLOW) 130IDTVEC(bnd) 131 TRAP(T_BOUND) 132IDTVEC(ill) 133 TRAP(T_PRIVINFLT) 134IDTVEC(dna) 135 TRAP(T_DNA) 136IDTVEC(fpusegm) 137 TRAP(T_FPOPFLT) 138IDTVEC(mchk) 139 TRAP(T_MCHK) 140IDTVEC(rsvd) 141 TRAP(T_RESERVED) 142IDTVEC(fpu) 143 TRAP(T_ARITHTRAP) 144IDTVEC(xmm) 145 TRAP(T_XMMFLT) 146 147/* This group of traps have tf_err already pushed by the cpu */ 148#define TRAP_ERR(a) \ 149 subq $TF_ERR,%rsp; \ 150 movl $(a),TF_TRAPNO(%rsp) ; \ 151 movq $0,TF_ADDR(%rsp) ; \ 152 jmp alltraps 153IDTVEC(tss) 154 TRAP_ERR(T_TSSFLT) 155IDTVEC(missing) 156 subq $TF_ERR,%rsp 157 movl $T_SEGNPFLT,TF_TRAPNO(%rsp) 158 jmp prot_addrf 159IDTVEC(stk) 160 subq $TF_ERR,%rsp 161 movl $T_STKFLT,TF_TRAPNO(%rsp) 162 jmp prot_addrf 163IDTVEC(align) 164 TRAP_ERR(T_ALIGNFLT) 165 166 /* 167 * alltraps entry point. Use swapgs if this is the first time in the 168 * kernel from userland. Reenable interrupts if they were enabled 169 * before the trap. This approximates SDT_SYS386TGT on the i386 port. 170 */ 171 SUPERALIGN_TEXT 172 .globl alltraps 173 .type alltraps,@function 174alltraps: 175 movq %rdi,TF_RDI(%rsp) 176 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 177 jz alltraps_testi /* already running with kernel GS.base */ 178 swapgs 179 movq PCPU(CURPCB),%rdi 180 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 181 movw %fs,TF_FS(%rsp) 182 movw %gs,TF_GS(%rsp) 183 movw %es,TF_ES(%rsp) 184 movw %ds,TF_DS(%rsp) 185alltraps_testi: 186 testl $PSL_I,TF_RFLAGS(%rsp) 187 jz alltraps_pushregs_no_rdi 188 sti 189alltraps_pushregs_no_rdi: 190 movq %rsi,TF_RSI(%rsp) 191 movq %rdx,TF_RDX(%rsp) 192 movq %rcx,TF_RCX(%rsp) 193 movq %r8,TF_R8(%rsp) 194 movq %r9,TF_R9(%rsp) 195 movq %rax,TF_RAX(%rsp) 196 movq %rbx,TF_RBX(%rsp) 197 movq %rbp,TF_RBP(%rsp) 198 movq %r10,TF_R10(%rsp) 199 movq %r11,TF_R11(%rsp) 200 movq %r12,TF_R12(%rsp) 201 movq %r13,TF_R13(%rsp) 202 movq %r14,TF_R14(%rsp) 203 movq %r15,TF_R15(%rsp) 204 movl $TF_HASSEGS,TF_FLAGS(%rsp) 205 cld 206 FAKE_MCOUNT(TF_RIP(%rsp)) 207#ifdef KDTRACE_HOOKS 208 /* 209 * DTrace Function Boundary Trace (fbt) probes are triggered 210 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint 211 * interrupt. For all other trap types, just handle them in 212 * the usual way. 213 */ 214 cmpl $T_BPTFLT,TF_TRAPNO(%rsp) 215 jne calltrap 216 217 /* Check if there is no DTrace hook registered. */ 218 cmpq $0,dtrace_invop_jump_addr 219 je calltrap 220 221 /* 222 * Set our jump address for the jump back in the event that 223 * the breakpoint wasn't caused by DTrace at all. 224 */ 225 movq $calltrap,dtrace_invop_calltrap_addr(%rip) 226 227 /* Jump to the code hooked in by DTrace. */ 228 movq dtrace_invop_jump_addr,%rax 229 jmpq *dtrace_invop_jump_addr 230#endif 231 .globl calltrap 232 .type calltrap,@function 233calltrap: 234 movq %rsp,%rdi 235 call trap_check 236 MEXITCOUNT 237 jmp doreti /* Handle any pending ASTs */ 238 239 /* 240 * alltraps_noen entry point. Unlike alltraps above, we want to 241 * leave the interrupts disabled. This corresponds to 242 * SDT_SYS386IGT on the i386 port. 243 */ 244 SUPERALIGN_TEXT 245 .globl alltraps_noen 246 .type alltraps_noen,@function 247alltraps_noen: 248 movq %rdi,TF_RDI(%rsp) 249 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 250 jz 1f /* already running with kernel GS.base */ 251 swapgs 252 movq PCPU(CURPCB),%rdi 253 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 2541: movw %fs,TF_FS(%rsp) 255 movw %gs,TF_GS(%rsp) 256 movw %es,TF_ES(%rsp) 257 movw %ds,TF_DS(%rsp) 258 jmp alltraps_pushregs_no_rdi 259 260IDTVEC(dblfault) 261 subq $TF_ERR,%rsp 262 movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) 263 movq $0,TF_ADDR(%rsp) 264 movq $0,TF_ERR(%rsp) 265 movq %rdi,TF_RDI(%rsp) 266 movq %rsi,TF_RSI(%rsp) 267 movq %rdx,TF_RDX(%rsp) 268 movq %rcx,TF_RCX(%rsp) 269 movq %r8,TF_R8(%rsp) 270 movq %r9,TF_R9(%rsp) 271 movq %rax,TF_RAX(%rsp) 272 movq %rbx,TF_RBX(%rsp) 273 movq %rbp,TF_RBP(%rsp) 274 movq %r10,TF_R10(%rsp) 275 movq %r11,TF_R11(%rsp) 276 movq %r12,TF_R12(%rsp) 277 movq %r13,TF_R13(%rsp) 278 movq %r14,TF_R14(%rsp) 279 movq %r15,TF_R15(%rsp) 280 movw %fs,TF_FS(%rsp) 281 movw %gs,TF_GS(%rsp) 282 movw %es,TF_ES(%rsp) 283 movw %ds,TF_DS(%rsp) 284 movl $TF_HASSEGS,TF_FLAGS(%rsp) 285 cld 286 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 287 jz 1f /* already running with kernel GS.base */ 288 swapgs 2891: 290 movq %rsp,%rdi 291 call dblfault_handler 2922: 293 hlt 294 jmp 2b 295 296IDTVEC(page) 297 subq $TF_ERR,%rsp 298 movl $T_PAGEFLT,TF_TRAPNO(%rsp) 299 movq %rdi,TF_RDI(%rsp) /* free up a GP register */ 300 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 301 jz 1f /* already running with kernel GS.base */ 302 swapgs 303 movq PCPU(CURPCB),%rdi 304 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 3051: movq %cr2,%rdi /* preserve %cr2 before .. */ 306 movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ 307 movw %fs,TF_FS(%rsp) 308 movw %gs,TF_GS(%rsp) 309 movw %es,TF_ES(%rsp) 310 movw %ds,TF_DS(%rsp) 311 testl $PSL_I,TF_RFLAGS(%rsp) 312 jz alltraps_pushregs_no_rdi 313 sti 314 jmp alltraps_pushregs_no_rdi 315 316 /* 317 * We have to special-case this one. If we get a trap in doreti() at 318 * the iretq stage, we'll reenter with the wrong gs state. We'll have 319 * to do a special the swapgs in this case even coming from the kernel. 320 * XXX linux has a trap handler for their equivalent of load_gs(). 321 */ 322IDTVEC(prot) 323 subq $TF_ERR,%rsp 324 movl $T_PROTFLT,TF_TRAPNO(%rsp) 325prot_addrf: 326 movq $0,TF_ADDR(%rsp) 327 movq %rdi,TF_RDI(%rsp) /* free up a GP register */ 328 leaq doreti_iret(%rip),%rdi 329 cmpq %rdi,TF_RIP(%rsp) 330 je 1f /* kernel but with user gsbase!! */ 331 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 332 jz 2f /* already running with kernel GS.base */ 3331: swapgs 3342: movq PCPU(CURPCB),%rdi 335 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ 336 movw %fs,TF_FS(%rsp) 337 movw %gs,TF_GS(%rsp) 338 movw %es,TF_ES(%rsp) 339 movw %ds,TF_DS(%rsp) 340 testl $PSL_I,TF_RFLAGS(%rsp) 341 jz alltraps_pushregs_no_rdi 342 sti 343 jmp alltraps_pushregs_no_rdi 344 345/* 346 * Fast syscall entry point. We enter here with just our new %cs/%ss set, 347 * and the new privilige level. We are still running on the old user stack 348 * pointer. We have to juggle a few things around to find our stack etc. 349 * swapgs gives us access to our PCPU space only. 350 * 351 * We do not support invoking this from a custom %cs or %ss (e.g. using 352 * entries from an LDT). 353 */ 354IDTVEC(fast_syscall) 355 swapgs 356 movq %rsp,PCPU(SCRATCH_RSP) 357 movq PCPU(RSP0),%rsp 358 /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ 359 subq $TF_SIZE,%rsp 360 /* defer TF_RSP till we have a spare register */ 361 movq %r11,TF_RFLAGS(%rsp) 362 movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ 363 movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ 364 movq %r11,TF_RSP(%rsp) /* user stack pointer */ 365 movw %fs,TF_FS(%rsp) 366 movw %gs,TF_GS(%rsp) 367 movw %es,TF_ES(%rsp) 368 movw %ds,TF_DS(%rsp) 369 movq PCPU(CURPCB),%r11 370 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) 371 sti 372 movq $KUDSEL,TF_SS(%rsp) 373 movq $KUCSEL,TF_CS(%rsp) 374 movq $2,TF_ERR(%rsp) 375 movq %rdi,TF_RDI(%rsp) /* arg 1 */ 376 movq %rsi,TF_RSI(%rsp) /* arg 2 */ 377 movq %rdx,TF_RDX(%rsp) /* arg 3 */ 378 movq %r10,TF_RCX(%rsp) /* arg 4 */ 379 movq %r8,TF_R8(%rsp) /* arg 5 */ 380 movq %r9,TF_R9(%rsp) /* arg 6 */ 381 movq %rax,TF_RAX(%rsp) /* syscall number */ 382 movq %rbx,TF_RBX(%rsp) /* C preserved */ 383 movq %rbp,TF_RBP(%rsp) /* C preserved */ 384 movq %r12,TF_R12(%rsp) /* C preserved */ 385 movq %r13,TF_R13(%rsp) /* C preserved */ 386 movq %r14,TF_R14(%rsp) /* C preserved */ 387 movq %r15,TF_R15(%rsp) /* C preserved */ 388 movl $TF_HASSEGS,TF_FLAGS(%rsp) 389 cld 390 FAKE_MCOUNT(TF_RIP(%rsp)) 391 movq PCPU(CURTHREAD),%rdi 392 movq %rsp,TD_FRAME(%rdi) 393 movl TF_RFLAGS(%rsp),%esi 394 andl $PSL_T,%esi 395 call amd64_syscall 3961: movq PCPU(CURPCB),%rax 397 /* Disable interrupts before testing PCB_FULL_IRET. */ 398 cli 399 testl $PCB_FULL_IRET,PCB_FLAGS(%rax) 400 jnz 3f 401 /* Check for and handle AST's on return to userland. */ 402 movq PCPU(CURTHREAD),%rax 403 testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) 404 jne 2f 405 /* Restore preserved registers. */ 406 MEXITCOUNT 407 movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ 408 movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ 409 movq TF_RDX(%rsp),%rdx /* return value 2 */ 410 movq TF_RAX(%rsp),%rax /* return value 1 */ 411 movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ 412 movq TF_RIP(%rsp),%rcx /* original %rip */ 413 movq TF_RSP(%rsp),%rsp /* user stack pointer */ 414 swapgs 415 sysretq 416 4172: /* AST scheduled. */ 418 sti 419 movq %rsp,%rdi 420 call ast 421 jmp 1b 422 4233: /* Requested full context restore, use doreti for that. */ 424 MEXITCOUNT 425 jmp doreti 426 427/* 428 * Here for CYA insurance, in case a "syscall" instruction gets 429 * issued from 32 bit compatability mode. MSR_CSTAR has to point 430 * to *something* if EFER_SCE is enabled. 431 */ 432IDTVEC(fast_syscall32) 433 sysret 434 435/* 436 * NMI handling is special. 437 * 438 * First, NMIs do not respect the state of the processor's RFLAGS.IF 439 * bit. The NMI handler may be entered at any time, including when 440 * the processor is in a critical section with RFLAGS.IF == 0. 441 * The processor's GS.base value could be invalid on entry to the 442 * handler. 443 * 444 * Second, the processor treats NMIs specially, blocking further NMIs 445 * until an 'iretq' instruction is executed. We thus need to execute 446 * the NMI handler with interrupts disabled, to prevent a nested interrupt 447 * from executing an 'iretq' instruction and inadvertently taking the 448 * processor out of NMI mode. 449 * 450 * Third, the NMI handler runs on its own stack (tss_ist2). The canonical 451 * GS.base value for the processor is stored just above the bottom of its 452 * NMI stack. For NMIs taken from kernel mode, the current value in 453 * the processor's GS.base is saved at entry to C-preserved register %r12, 454 * the canonical value for GS.base is then loaded into the processor, and 455 * the saved value is restored at exit time. For NMIs taken from user mode, 456 * the cheaper 'SWAPGS' instructions are used for swapping GS.base. 457 */ 458 459IDTVEC(nmi) 460 subq $TF_RIP,%rsp 461 movl $(T_NMI),TF_TRAPNO(%rsp) 462 movq $0,TF_ADDR(%rsp) 463 movq $0,TF_ERR(%rsp) 464 movq %rdi,TF_RDI(%rsp) 465 movq %rsi,TF_RSI(%rsp) 466 movq %rdx,TF_RDX(%rsp) 467 movq %rcx,TF_RCX(%rsp) 468 movq %r8,TF_R8(%rsp) 469 movq %r9,TF_R9(%rsp) 470 movq %rax,TF_RAX(%rsp) 471 movq %rbx,TF_RBX(%rsp) 472 movq %rbp,TF_RBP(%rsp) 473 movq %r10,TF_R10(%rsp) 474 movq %r11,TF_R11(%rsp) 475 movq %r12,TF_R12(%rsp) 476 movq %r13,TF_R13(%rsp) 477 movq %r14,TF_R14(%rsp) 478 movq %r15,TF_R15(%rsp) 479 movw %fs,TF_FS(%rsp) 480 movw %gs,TF_GS(%rsp) 481 movw %es,TF_ES(%rsp) 482 movw %ds,TF_DS(%rsp) 483 movl $TF_HASSEGS,TF_FLAGS(%rsp) 484 cld 485 xorl %ebx,%ebx 486 testb $SEL_RPL_MASK,TF_CS(%rsp) 487 jnz nmi_fromuserspace 488 /* 489 * We've interrupted the kernel. Preserve GS.base in %r12. 490 */ 491 movl $MSR_GSBASE,%ecx 492 rdmsr 493 movq %rax,%r12 494 shlq $32,%rdx 495 orq %rdx,%r12 496 /* Retrieve and load the canonical value for GS.base. */ 497 movq TF_SIZE(%rsp),%rdx 498 movl %edx,%eax 499 shrq $32,%rdx 500 wrmsr 501 jmp nmi_calltrap 502nmi_fromuserspace: 503 incl %ebx 504 swapgs 505/* Note: this label is also used by ddb and gdb: */ 506nmi_calltrap: 507 FAKE_MCOUNT(TF_RIP(%rsp)) 508 movq %rsp,%rdi 509 call trap 510 MEXITCOUNT 511#ifdef HWPMC_HOOKS 512 /* 513 * Capture a userspace callchain if needed. 514 * 515 * - Check if the current trap was from user mode. 516 * - Check if the current thread is valid. 517 * - Check if the thread requires a user call chain to be 518 * captured. 519 * 520 * We are still in NMI mode at this point. 521 */ 522 testl %ebx,%ebx 523 jz nocallchain /* not from userspace */ 524 movq PCPU(CURTHREAD),%rax 525 orq %rax,%rax /* curthread present? */ 526 jz nocallchain 527 testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ 528 jz nocallchain 529 /* 530 * A user callchain is to be captured, so: 531 * - Move execution to the regular kernel stack, to allow for 532 * nested NMI interrupts. 533 * - Take the processor out of "NMI" mode by faking an "iret". 534 * - Enable interrupts, so that copyin() can work. 535 */ 536 movq %rsp,%rsi /* source stack pointer */ 537 movq $TF_SIZE,%rcx 538 movq PCPU(RSP0),%rdx 539 subq %rcx,%rdx 540 movq %rdx,%rdi /* destination stack pointer */ 541 542 shrq $3,%rcx /* trap frame size in long words */ 543 cld 544 rep 545 movsq /* copy trapframe */ 546 547 movl %ss,%eax 548 pushq %rax /* tf_ss */ 549 pushq %rdx /* tf_rsp (on kernel stack) */ 550 pushfq /* tf_rflags */ 551 movl %cs,%eax 552 pushq %rax /* tf_cs */ 553 pushq $outofnmi /* tf_rip */ 554 iretq 555outofnmi: 556 /* 557 * At this point the processor has exited NMI mode and is running 558 * with interrupts turned off on the normal kernel stack. 559 * 560 * If a pending NMI gets recognized at or after this point, it 561 * will cause a kernel callchain to be traced. 562 * 563 * We turn interrupts back on, and call the user callchain capture hook. 564 */ 565 movq pmc_hook,%rax 566 orq %rax,%rax 567 jz nocallchain 568 movq PCPU(CURTHREAD),%rdi /* thread */ 569 movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ 570 movq %rsp,%rdx /* frame */ 571 sti 572 call *%rax 573 cli 574nocallchain: 575#endif 576 testl %ebx,%ebx 577 jnz doreti_exit 578nmi_kernelexit: 579 /* 580 * Put back the preserved MSR_GSBASE value. 581 */ 582 movl $MSR_GSBASE,%ecx 583 movq %r12,%rdx 584 movl %edx,%eax 585 shrq $32,%rdx 586 wrmsr 587nmi_restoreregs: 588 movq TF_RDI(%rsp),%rdi 589 movq TF_RSI(%rsp),%rsi 590 movq TF_RDX(%rsp),%rdx 591 movq TF_RCX(%rsp),%rcx 592 movq TF_R8(%rsp),%r8 593 movq TF_R9(%rsp),%r9 594 movq TF_RAX(%rsp),%rax 595 movq TF_RBX(%rsp),%rbx 596 movq TF_RBP(%rsp),%rbp 597 movq TF_R10(%rsp),%r10 598 movq TF_R11(%rsp),%r11 599 movq TF_R12(%rsp),%r12 600 movq TF_R13(%rsp),%r13 601 movq TF_R14(%rsp),%r14 602 movq TF_R15(%rsp),%r15 603 addq $TF_RIP,%rsp 604 jmp doreti_iret 605 606ENTRY(fork_trampoline) 607 movq %r12,%rdi /* function */ 608 movq %rbx,%rsi /* arg1 */ 609 movq %rsp,%rdx /* trapframe pointer */ 610 call fork_exit 611 MEXITCOUNT 612 jmp doreti /* Handle any ASTs */ 613 614/* 615 * To efficiently implement classification of trap and interrupt handlers 616 * for profiling, there must be only trap handlers between the labels btrap 617 * and bintr, and only interrupt handlers between the labels bintr and 618 * eintr. This is implemented (partly) by including files that contain 619 * some of the handlers. Before including the files, set up a normal asm 620 * environment so that the included files doen't need to know that they are 621 * included. 622 */ 623 624#ifdef COMPAT_FREEBSD32 625 .data 626 .p2align 4 627 .text 628 SUPERALIGN_TEXT 629 630#include <amd64/ia32/ia32_exception.S> 631#endif 632 633 .data 634 .p2align 4 635 .text 636 SUPERALIGN_TEXT 637MCOUNT_LABEL(bintr) 638 639#include <amd64/amd64/apic_vector.S> 640 641#ifdef DEV_ATPIC 642 .data 643 .p2align 4 644 .text 645 SUPERALIGN_TEXT 646 647#include <amd64/amd64/atpic_vector.S> 648#endif 649 650 .text 651MCOUNT_LABEL(eintr) 652 653/* 654 * void doreti(struct trapframe) 655 * 656 * Handle return from interrupts, traps and syscalls. 657 */ 658 .text 659 SUPERALIGN_TEXT 660 .type doreti,@function 661doreti: 662 FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ 663 /* 664 * Check if ASTs can be handled now. 665 */ 666 testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ 667 jz doreti_exit /* can't handle ASTs now if not */ 668 669doreti_ast: 670 /* 671 * Check for ASTs atomically with returning. Disabling CPU 672 * interrupts provides sufficient locking even in the SMP case, 673 * since we will be informed of any new ASTs by an IPI. 674 */ 675 cli 676 movq PCPU(CURTHREAD),%rax 677 testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) 678 je doreti_exit 679 sti 680 movq %rsp,%rdi /* pass a pointer to the trapframe */ 681 call ast 682 jmp doreti_ast 683 684 /* 685 * doreti_exit: pop registers, iret. 686 * 687 * The segment register pop is a special case, since it may 688 * fault if (for example) a sigreturn specifies bad segment 689 * registers. The fault is handled in trap.c. 690 */ 691doreti_exit: 692 MEXITCOUNT 693 movq PCPU(CURPCB),%r8 694 695 /* 696 * Do not reload segment registers for kernel. 697 * Since we do not reload segments registers with sane 698 * values on kernel entry, descriptors referenced by 699 * segments registers might be not valid. This is fatal 700 * for user mode, but is not a problem for the kernel. 701 */ 702 testb $SEL_RPL_MASK,TF_CS(%rsp) 703 jz ld_regs 704 testl $PCB_FULL_IRET,PCB_FLAGS(%r8) 705 jz ld_regs 706 testl $TF_HASSEGS,TF_FLAGS(%rsp) 707 je set_segs 708 709do_segs: 710 /* Restore %fs and fsbase */ 711 movw TF_FS(%rsp),%ax 712 .globl ld_fs 713ld_fs: 714 movw %ax,%fs 715 cmpw $KUF32SEL,%ax 716 jne 1f 717 movl $MSR_FSBASE,%ecx 718 movl PCB_FSBASE(%r8),%eax 719 movl PCB_FSBASE+4(%r8),%edx 720 .globl ld_fsbase 721ld_fsbase: 722 wrmsr 7231: 724 /* Restore %gs and gsbase */ 725 movw TF_GS(%rsp),%si 726 pushfq 727 cli 728 movl $MSR_GSBASE,%ecx 729 /* Save current kernel %gs base into %r12d:%r13d */ 730 rdmsr 731 movl %eax,%r12d 732 movl %edx,%r13d 733 .globl ld_gs 734ld_gs: 735 movw %si,%gs 736 /* Save user %gs base into %r14d:%r15d */ 737 rdmsr 738 movl %eax,%r14d 739 movl %edx,%r15d 740 /* Restore kernel %gs base */ 741 movl %r12d,%eax 742 movl %r13d,%edx 743 wrmsr 744 popfq 745 /* 746 * Restore user %gs base, either from PCB if used for TLS, or 747 * from the previously saved msr read. 748 */ 749 movl $MSR_KGSBASE,%ecx 750 cmpw $KUG32SEL,%si 751 jne 1f 752 movl PCB_GSBASE(%r8),%eax 753 movl PCB_GSBASE+4(%r8),%edx 754 jmp ld_gsbase 7551: 756 movl %r14d,%eax 757 movl %r15d,%edx 758 .globl ld_gsbase 759ld_gsbase: 760 wrmsr /* May trap if non-canonical, but only for TLS. */ 761 .globl ld_es 762ld_es: 763 movw TF_ES(%rsp),%es 764 .globl ld_ds 765ld_ds: 766 movw TF_DS(%rsp),%ds 767ld_regs: 768 movq TF_RDI(%rsp),%rdi 769 movq TF_RSI(%rsp),%rsi 770 movq TF_RDX(%rsp),%rdx 771 movq TF_RCX(%rsp),%rcx 772 movq TF_R8(%rsp),%r8 773 movq TF_R9(%rsp),%r9 774 movq TF_RAX(%rsp),%rax 775 movq TF_RBX(%rsp),%rbx 776 movq TF_RBP(%rsp),%rbp 777 movq TF_R10(%rsp),%r10 778 movq TF_R11(%rsp),%r11 779 movq TF_R12(%rsp),%r12 780 movq TF_R13(%rsp),%r13 781 movq TF_R14(%rsp),%r14 782 movq TF_R15(%rsp),%r15 783 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 784 jz 1f /* keep running with kernel GS.base */ 785 cli 786 swapgs 7871: 788 addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ 789 .globl doreti_iret 790doreti_iret: 791 iretq 792 793set_segs: 794 movw $KUDSEL,%ax 795 movw %ax,TF_DS(%rsp) 796 movw %ax,TF_ES(%rsp) 797 movw $KUF32SEL,TF_FS(%rsp) 798 movw $KUG32SEL,TF_GS(%rsp) 799 jmp do_segs 800 801 /* 802 * doreti_iret_fault. Alternative return code for 803 * the case where we get a fault in the doreti_exit code 804 * above. trap() (amd64/amd64/trap.c) catches this specific 805 * case, sends the process a signal and continues in the 806 * corresponding place in the code below. 807 */ 808 ALIGN_TEXT 809 .globl doreti_iret_fault 810doreti_iret_fault: 811 subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ 812 testl $PSL_I,TF_RFLAGS(%rsp) 813 jz 1f 814 sti 8151: 816 movw %fs,TF_FS(%rsp) 817 movw %gs,TF_GS(%rsp) 818 movw %es,TF_ES(%rsp) 819 movw %ds,TF_DS(%rsp) 820 movl $TF_HASSEGS,TF_FLAGS(%rsp) 821 movq %rdi,TF_RDI(%rsp) 822 movq %rsi,TF_RSI(%rsp) 823 movq %rdx,TF_RDX(%rsp) 824 movq %rcx,TF_RCX(%rsp) 825 movq %r8,TF_R8(%rsp) 826 movq %r9,TF_R9(%rsp) 827 movq %rax,TF_RAX(%rsp) 828 movq %rbx,TF_RBX(%rsp) 829 movq %rbp,TF_RBP(%rsp) 830 movq %r10,TF_R10(%rsp) 831 movq %r11,TF_R11(%rsp) 832 movq %r12,TF_R12(%rsp) 833 movq %r13,TF_R13(%rsp) 834 movq %r14,TF_R14(%rsp) 835 movq %r15,TF_R15(%rsp) 836 movl $T_PROTFLT,TF_TRAPNO(%rsp) 837 movq $0,TF_ERR(%rsp) /* XXX should be the error code */ 838 movq $0,TF_ADDR(%rsp) 839 FAKE_MCOUNT(TF_RIP(%rsp)) 840 jmp calltrap 841 842 ALIGN_TEXT 843 .globl ds_load_fault 844ds_load_fault: 845 movl $T_PROTFLT,TF_TRAPNO(%rsp) 846 testl $PSL_I,TF_RFLAGS(%rsp) 847 jz 1f 848 sti 8491: 850 movq %rsp,%rdi 851 call trap 852 movw $KUDSEL,TF_DS(%rsp) 853 jmp doreti 854 855 ALIGN_TEXT 856 .globl es_load_fault 857es_load_fault: 858 movl $T_PROTFLT,TF_TRAPNO(%rsp) 859 testl $PSL_I,TF_RFLAGS(%rsp) 860 jz 1f 861 sti 8621: 863 movq %rsp,%rdi 864 call trap 865 movw $KUDSEL,TF_ES(%rsp) 866 jmp doreti 867 868 ALIGN_TEXT 869 .globl fs_load_fault 870fs_load_fault: 871 testl $PSL_I,TF_RFLAGS(%rsp) 872 jz 1f 873 sti 8741: 875 movl $T_PROTFLT,TF_TRAPNO(%rsp) 876 movq %rsp,%rdi 877 call trap 878 movw $KUF32SEL,TF_FS(%rsp) 879 jmp doreti 880 881 ALIGN_TEXT 882 .globl gs_load_fault 883gs_load_fault: 884 popfq 885 movl $T_PROTFLT,TF_TRAPNO(%rsp) 886 testl $PSL_I,TF_RFLAGS(%rsp) 887 jz 1f 888 sti 8891: 890 movq %rsp,%rdi 891 call trap 892 movw $KUG32SEL,TF_GS(%rsp) 893 jmp doreti 894 895 ALIGN_TEXT 896 .globl fsbase_load_fault 897fsbase_load_fault: 898 movl $T_PROTFLT,TF_TRAPNO(%rsp) 899 testl $PSL_I,TF_RFLAGS(%rsp) 900 jz 1f 901 sti 9021: 903 movq %rsp,%rdi 904 call trap 905 movq PCPU(CURTHREAD),%r8 906 movq TD_PCB(%r8),%r8 907 movq $0,PCB_FSBASE(%r8) 908 jmp doreti 909 910 ALIGN_TEXT 911 .globl gsbase_load_fault 912gsbase_load_fault: 913 movl $T_PROTFLT,TF_TRAPNO(%rsp) 914 testl $PSL_I,TF_RFLAGS(%rsp) 915 jz 1f 916 sti 9171: 918 movq %rsp,%rdi 919 call trap 920 movq PCPU(CURTHREAD),%r8 921 movq TD_PCB(%r8),%r8 922 movq $0,PCB_GSBASE(%r8) 923 jmp doreti 924 925#ifdef HWPMC_HOOKS 926 ENTRY(end_exceptions) 927#endif 928