1/*- 2 * Copyright (c) 1989, 1990 William F. Jolitz. 3 * Copyright (c) 1990 The Regents of the University of California. 4 * Copyright (c) 2007-2018 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * Portions of this software were developed by A. Joseph Koshy under 8 * sponsorship from the FreeBSD Foundation and Google, Inc. 9 * 10 * Portions of this software were developed by 11 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 12 * the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $FreeBSD$ 39 */ 40 41#include "opt_atpic.h" 42#include "opt_hwpmc_hooks.h" 43 44#include "assym.inc" 45 46#include <machine/psl.h> 47#include <machine/asmacros.h> 48#include <machine/trap.h> 49#include <machine/specialreg.h> 50#include <machine/pmap.h> 51 52#ifdef KDTRACE_HOOKS 53 .bss 54 .globl dtrace_invop_jump_addr 55 .align 8 56 .type dtrace_invop_jump_addr,@object 57 .size dtrace_invop_jump_addr,8 58dtrace_invop_jump_addr: 59 .zero 8 60 .globl dtrace_invop_calltrap_addr 61 .align 8 62 .type dtrace_invop_calltrap_addr,@object 63 .size dtrace_invop_calltrap_addr,8 64dtrace_invop_calltrap_addr: 65 .zero 8 66#endif 67 .text 68#ifdef HWPMC_HOOKS 69 ENTRY(start_exceptions) 70#endif 71 72/*****************************************************************************/ 73/* Trap handling */ 74/*****************************************************************************/ 75/* 76 * Trap and fault vector routines. 77 * 78 * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes 79 * state on the stack but also disables interrupts. This is important for 80 * us for the use of the swapgs instruction. We cannot be interrupted 81 * until the GS.base value is correct. For most traps, we automatically 82 * then enable interrupts if the interrupted context had them enabled. 83 * This is equivalent to the i386 port's use of SDT_SYS386TGT. 84 * 85 * The cpu will push a certain amount of state onto the kernel stack for 86 * the current process. See amd64/include/frame.h. 87 * This includes the current RFLAGS (status register, which includes 88 * the interrupt disable state prior to the trap), the code segment register, 89 * and the return instruction pointer are pushed by the cpu. The cpu 90 * will also push an 'error' code for certain traps. We push a dummy 91 * error code for those traps where the cpu doesn't in order to maintain 92 * a consistent frame. We also push a contrived 'trap number'. 93 * 94 * The CPU does not push the general registers, so we must do that, and we 95 * must restore them prior to calling 'iret'. The CPU adjusts %cs and %ss 96 * but does not mess with %ds, %es, %gs or %fs. We swap the %gs base for 97 * for the kernel mode operation shortly, without changes to the selector 98 * loaded. Since superuser long mode works with any selectors loaded into 99 * segment registers other then %cs, which makes them mostly unused in long 100 * mode, and kernel does not reference %fs, leave them alone. The segment 101 * registers are reloaded on return to the usermode. 102 */ 103 104/* Traps that we leave interrupts disabled for. */ 105 .macro TRAP_NOEN l, trapno 106 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u 107\l\()_pti_k: 108 subq $TF_RIP,%rsp 109 movl $\trapno,TF_TRAPNO(%rsp) 110 movq $0,TF_ADDR(%rsp) 111 movq $0,TF_ERR(%rsp) 112 jmp alltraps_noen_k 113\l\()_pti_u: 114 subq $TF_RIP,%rsp 115 movl $\trapno,TF_TRAPNO(%rsp) 116 movq $0,TF_ADDR(%rsp) 117 movq $0,TF_ERR(%rsp) 118 jmp alltraps_noen_u 119 120 .globl X\l 121 .type X\l,@function 122X\l: 123 subq $TF_RIP,%rsp 124 movl $\trapno,TF_TRAPNO(%rsp) 125 movq $0,TF_ADDR(%rsp) 126 movq $0,TF_ERR(%rsp) 127 testb $SEL_RPL_MASK,TF_CS(%rsp) 128 jz alltraps_noen_k 129 swapgs 130 lfence 131 jmp alltraps_noen_u 132 .endm 133 134 TRAP_NOEN bpt, T_BPTFLT 135#ifdef KDTRACE_HOOKS 136 TRAP_NOEN dtrace_ret, T_DTRACE_RET 137#endif 138 139/* Regular traps; The cpu does not supply tf_err for these. */ 140 .macro TRAP l, trapno 141 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u 142\l\()_pti_k: 143 subq $TF_RIP,%rsp 144 movl $\trapno,TF_TRAPNO(%rsp) 145 movq $0,TF_ADDR(%rsp) 146 movq $0,TF_ERR(%rsp) 147 jmp alltraps_k 148\l\()_pti_u: 149 subq $TF_RIP,%rsp 150 movl $\trapno,TF_TRAPNO(%rsp) 151 movq $0,TF_ADDR(%rsp) 152 movq $0,TF_ERR(%rsp) 153 jmp alltraps_u 154 155 .globl X\l 156 .type X\l,@function 157X\l: 158 subq $TF_RIP,%rsp 159 movl $\trapno,TF_TRAPNO(%rsp) 160 movq $0,TF_ADDR(%rsp) 161 movq $0,TF_ERR(%rsp) 162 testb $SEL_RPL_MASK,TF_CS(%rsp) 163 jz alltraps_k 164 swapgs 165 lfence 166 jmp alltraps_u 167 .endm 168 169 TRAP div, T_DIVIDE 170 TRAP ofl, T_OFLOW 171 TRAP bnd, T_BOUND 172 TRAP ill, T_PRIVINFLT 173 TRAP dna, T_DNA 174 TRAP fpusegm, T_FPOPFLT 175 TRAP rsvd, T_RESERVED 176 TRAP fpu, T_ARITHTRAP 177 TRAP xmm, T_XMMFLT 178 179/* This group of traps have tf_err already pushed by the cpu. */ 180 .macro TRAP_ERR l, trapno 181 PTI_ENTRY \l,\l\()_pti_k,\l\()_pti_u,has_err=1 182\l\()_pti_k: 183 subq $TF_ERR,%rsp 184 movl $\trapno,TF_TRAPNO(%rsp) 185 movq $0,TF_ADDR(%rsp) 186 jmp alltraps_k 187\l\()_pti_u: 188 subq $TF_ERR,%rsp 189 movl $\trapno,TF_TRAPNO(%rsp) 190 movq $0,TF_ADDR(%rsp) 191 jmp alltraps_u 192 .globl X\l 193 .type X\l,@function 194X\l: 195 subq $TF_ERR,%rsp 196 movl $\trapno,TF_TRAPNO(%rsp) 197 movq $0,TF_ADDR(%rsp) 198 testb $SEL_RPL_MASK,TF_CS(%rsp) 199 jz alltraps_k 200 swapgs 201 lfence 202 jmp alltraps_u 203 .endm 204 205 TRAP_ERR tss, T_TSSFLT 206 TRAP_ERR align, T_ALIGNFLT 207 208 /* 209 * alltraps_u/k entry points. 210 * SWAPGS must be already performed by prologue, 211 * if this is the first time in the kernel from userland. 212 * Reenable interrupts if they were enabled before the trap. 213 * This approximates SDT_SYS386TGT on the i386 port. 214 */ 215 SUPERALIGN_TEXT 216 .globl alltraps_u 217 .type alltraps_u,@function 218alltraps_u: 219 movq %rdi,TF_RDI(%rsp) 220 movq %rdx,TF_RDX(%rsp) 221 movq %rax,TF_RAX(%rsp) 222 movq %rcx,TF_RCX(%rsp) 223 movq PCPU(CURPCB),%rdi 224 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 225 call handle_ibrs_entry 226 jmp alltraps_save_segs 227 SUPERALIGN_TEXT 228 .globl alltraps_k 229 .type alltraps_k,@function 230alltraps_k: 231 lfence 232 movq %rdi,TF_RDI(%rsp) 233 movq %rdx,TF_RDX(%rsp) 234 movq %rax,TF_RAX(%rsp) 235 movq %rcx,TF_RCX(%rsp) 236alltraps_save_segs: 237 SAVE_SEGS 238 testl $PSL_I,TF_RFLAGS(%rsp) 239 jz alltraps_pushregs_no_rax 240 sti 241alltraps_pushregs_no_rax: 242 movq %rsi,TF_RSI(%rsp) 243 movq %r8,TF_R8(%rsp) 244 movq %r9,TF_R9(%rsp) 245 movq %rbx,TF_RBX(%rsp) 246 movq %rbp,TF_RBP(%rsp) 247 movq %r10,TF_R10(%rsp) 248 movq %r11,TF_R11(%rsp) 249 movq %r12,TF_R12(%rsp) 250 movq %r13,TF_R13(%rsp) 251 movq %r14,TF_R14(%rsp) 252 movq %r15,TF_R15(%rsp) 253 movl $TF_HASSEGS,TF_FLAGS(%rsp) 254 pushfq 255 andq $~(PSL_D | PSL_AC),(%rsp) 256 popfq 257#ifdef KDTRACE_HOOKS 258 /* 259 * DTrace Function Boundary Trace (fbt) probes are triggered 260 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint 261 * interrupt. For all other trap types, just handle them in 262 * the usual way. 263 */ 264 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 265 jnz calltrap /* ignore userland traps */ 266 cmpl $T_BPTFLT,TF_TRAPNO(%rsp) 267 jne calltrap 268 269 /* Check if there is no DTrace hook registered. */ 270 cmpq $0,dtrace_invop_jump_addr 271 je calltrap 272 273 /* 274 * Set our jump address for the jump back in the event that 275 * the breakpoint wasn't caused by DTrace at all. 276 */ 277 movq $calltrap,dtrace_invop_calltrap_addr(%rip) 278 279 /* Jump to the code hooked in by DTrace. */ 280 jmpq *dtrace_invop_jump_addr 281#endif 282 .globl calltrap 283 .type calltrap,@function 284calltrap: 285 KMSAN_ENTER 286 movq %rsp, %rdi 287 call trap_check 288 KMSAN_LEAVE 289 jmp doreti /* Handle any pending ASTs */ 290 291 /* 292 * alltraps_noen_u/k entry points. 293 * Again, SWAPGS must be already performed by prologue, if needed. 294 * Unlike alltraps above, we want to leave the interrupts disabled. 295 * This corresponds to SDT_SYS386IGT on the i386 port. 296 */ 297 SUPERALIGN_TEXT 298 .globl alltraps_noen_u 299 .type alltraps_noen_u,@function 300alltraps_noen_u: 301 movq %rdi,TF_RDI(%rsp) 302 movq PCPU(CURPCB),%rdi 303 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 304 jmp alltraps_noen_save_segs 305 SUPERALIGN_TEXT 306 .globl alltraps_noen_k 307 .type alltraps_noen_k,@function 308alltraps_noen_k: 309 lfence 310 movq %rdi,TF_RDI(%rsp) 311alltraps_noen_save_segs: 312 SAVE_SEGS 313 movq %rdx,TF_RDX(%rsp) 314 movq %rax,TF_RAX(%rsp) 315 movq %rcx,TF_RCX(%rsp) 316 testb $SEL_RPL_MASK,TF_CS(%rsp) 317 jz alltraps_pushregs_no_rax 318 call handle_ibrs_entry 319 jmp alltraps_pushregs_no_rax 320 321IDTVEC(dblfault) 322 subq $TF_ERR,%rsp 323 movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) 324 movq $0,TF_ADDR(%rsp) 325 movq $0,TF_ERR(%rsp) 326 movq %rdi,TF_RDI(%rsp) 327 movq %rsi,TF_RSI(%rsp) 328 movq %rdx,TF_RDX(%rsp) 329 movq %rcx,TF_RCX(%rsp) 330 movq %r8,TF_R8(%rsp) 331 movq %r9,TF_R9(%rsp) 332 movq %rax,TF_RAX(%rsp) 333 movq %rbx,TF_RBX(%rsp) 334 movq %rbp,TF_RBP(%rsp) 335 movq %r10,TF_R10(%rsp) 336 movq %r11,TF_R11(%rsp) 337 movq %r12,TF_R12(%rsp) 338 movq %r13,TF_R13(%rsp) 339 movq %r14,TF_R14(%rsp) 340 movq %r15,TF_R15(%rsp) 341 SAVE_SEGS 342 movl $TF_HASSEGS,TF_FLAGS(%rsp) 343 pushfq 344 andq $~(PSL_D | PSL_AC),(%rsp) 345 popfq 346 movq TF_SIZE(%rsp),%rdx 347 movl %edx,%eax 348 shrq $32,%rdx 349 movl $MSR_GSBASE,%ecx 350 wrmsr 351 movq %cr3,%rax 352 movq %rax,PCPU(SAVED_UCR3) 353 movq PCPU(KCR3),%rax 354 cmpq $~0,%rax 355 je 2f 356 movq %rax,%cr3 3572: KMSAN_ENTER 358 movq %rsp,%rdi 359 call dblfault_handler 360 KMSAN_LEAVE 3613: hlt 362 jmp 3b 363 364 ALIGN_TEXT 365IDTVEC(page_pti) 366 testb $SEL_RPL_MASK,PTI_CS-PTI_ERR(%rsp) 367 jz page_k 368 swapgs 369 lfence 370 pushq %rax 371 movq %cr3,%rax 372 movq %rax,PCPU(SAVED_UCR3) 373 cmpq $~0,PCPU(UCR3) 374 jne 1f 375 popq %rax 376 jmp page_u 3771: pushq %rdx 378 PTI_UUENTRY has_err=1 379 jmp page_u 380 ALIGN_TEXT 381IDTVEC(page) 382 testb $SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) /* Did we come from kernel? */ 383 jnz page_u_swapgs /* already running with kernel GS.base */ 384page_k: 385 lfence 386 subq $TF_ERR,%rsp 387 movq %rdi,TF_RDI(%rsp) /* free up GP registers */ 388 movq %rax,TF_RAX(%rsp) 389 movq %rdx,TF_RDX(%rsp) 390 movq %rcx,TF_RCX(%rsp) 391 jmp page_cr2 392 ALIGN_TEXT 393page_u_swapgs: 394 swapgs 395 lfence 396page_u: 397 subq $TF_ERR,%rsp 398 movq %rdi,TF_RDI(%rsp) 399 movq %rax,TF_RAX(%rsp) 400 movq %rdx,TF_RDX(%rsp) 401 movq %rcx,TF_RCX(%rsp) 402 movq PCPU(CURPCB),%rdi 403 andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) 404 movq PCPU(SAVED_UCR3),%rax 405 movq %rax,PCB_SAVED_UCR3(%rdi) 406 call handle_ibrs_entry 407page_cr2: 408 movq %cr2,%rdi /* preserve %cr2 before .. */ 409 movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ 410 SAVE_SEGS 411 movl $T_PAGEFLT,TF_TRAPNO(%rsp) 412 testl $PSL_I,TF_RFLAGS(%rsp) 413 jz alltraps_pushregs_no_rax 414 sti 415 jmp alltraps_pushregs_no_rax 416 417 /* 418 * We have to special-case this one. If we get a trap in doreti() at 419 * the iretq stage, we'll reenter with the wrong gs state. We'll have 420 * to do a special the swapgs in this case even coming from the kernel. 421 * XXX linux has a trap handler for their equivalent of load_gs(). 422 * 423 * On the stack, we have the hardware interrupt frame to return 424 * to usermode (faulted) and another frame with error code, for 425 * fault. For PTI, copy both frames to the main thread stack. 426 * Handle the potential 16-byte alignment adjustment incurred 427 * during the second fault by copying both frames independently 428 * while unwinding the stack in between. 429 */ 430 .macro PROTF_ENTRY name,trapno 431\name\()_pti_doreti: 432 swapgs 433 lfence 434 cmpq $~0,PCPU(UCR3) 435 je 1f 436 pushq %rax 437 pushq %rdx 438 movq PCPU(KCR3),%rax 439 movq %rax,%cr3 440 movq PCPU(RSP0),%rax 441 subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */ 442 MOVE_STACKS (PTI_SIZE / 8) 443 addq $PTI_SIZE,%rax 444 movq PTI_RSP(%rsp),%rsp 445 MOVE_STACKS (PTI_SIZE / 8 - 3) 446 subq $PTI_SIZE,%rax 447 movq %rax,%rsp 448 popq %rdx 449 popq %rax 4501: swapgs 451 jmp X\name 452IDTVEC(\name\()_pti) 453 cmpq $doreti_iret,PTI_RIP-2*8(%rsp) 454 je \name\()_pti_doreti 455 testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */ 456 jz X\name /* lfence is not needed until %gs: use */ 457 PTI_UENTRY has_err=1 458 swapgs /* fence provided by PTI_UENTRY */ 459IDTVEC(\name) 460 subq $TF_ERR,%rsp 461 movl $\trapno,TF_TRAPNO(%rsp) 462 jmp prot_addrf 463 .endm 464 465 PROTF_ENTRY missing, T_SEGNPFLT 466 PROTF_ENTRY stk, T_STKFLT 467 PROTF_ENTRY prot, T_PROTFLT 468 469prot_addrf: 470 movq $0,TF_ADDR(%rsp) 471 movq %rdi,TF_RDI(%rsp) /* free up a GP register */ 472 movq %rax,TF_RAX(%rsp) 473 movq %rdx,TF_RDX(%rsp) 474 movq %rcx,TF_RCX(%rsp) 475 movw %fs,TF_FS(%rsp) 476 movw %gs,TF_GS(%rsp) 477 leaq doreti_iret(%rip),%rdi 478 cmpq %rdi,TF_RIP(%rsp) 479 je 5f /* kernel but with user gsbase!! */ 480 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 481 jz 6f /* already running with kernel GS.base */ 482 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 483 jz 2f 484 cmpw $KUF32SEL,TF_FS(%rsp) 485 jne 1f 486 rdfsbase %rax 4871: cmpw $KUG32SEL,TF_GS(%rsp) 488 jne 2f 489 rdgsbase %rdx 4902: swapgs 491 lfence 492 movq PCPU(CURPCB),%rdi 493 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 494 jz 4f 495 cmpw $KUF32SEL,TF_FS(%rsp) 496 jne 3f 497 movq %rax,PCB_FSBASE(%rdi) 4983: cmpw $KUG32SEL,TF_GS(%rsp) 499 jne 4f 500 movq %rdx,PCB_GSBASE(%rdi) 501 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* full iret from user #gp */ 5024: call handle_ibrs_entry 503 movw %es,TF_ES(%rsp) 504 movw %ds,TF_DS(%rsp) 505 testl $PSL_I,TF_RFLAGS(%rsp) 506 jz alltraps_pushregs_no_rax 507 sti 508 jmp alltraps_pushregs_no_rax 509 5105: swapgs 5116: lfence 512 movq PCPU(CURPCB),%rdi 513 jmp 4b 514 515/* 516 * Fast syscall entry point. We enter here with just our new %cs/%ss set, 517 * and the new privilige level. We are still running on the old user stack 518 * pointer. We have to juggle a few things around to find our stack etc. 519 * swapgs gives us access to our PCPU space only. 520 * 521 * We do not support invoking this from a custom segment registers, 522 * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. 523 */ 524 SUPERALIGN_TEXT 525IDTVEC(fast_syscall_pti) 526 swapgs 527 cmpq $~0,PCPU(UCR3) 528 je fast_syscall_common 529 movq %rax,PCPU(SCRATCH_RAX) 530 movq PCPU(KCR3),%rax 531 movq %rax,%cr3 532 movq PCPU(SCRATCH_RAX),%rax 533 jmp fast_syscall_common 534 SUPERALIGN_TEXT 535IDTVEC(fast_syscall) 536 swapgs 537fast_syscall_common: 538 movq %rsp,PCPU(SCRATCH_RSP) 539 movq PCPU(RSP0),%rsp 540 /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ 541 subq $TF_SIZE,%rsp 542 /* defer TF_RSP till we have a spare register */ 543 movq %r11,TF_RFLAGS(%rsp) 544 movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ 545 movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ 546 movq %r11,TF_RSP(%rsp) /* user stack pointer */ 547 /* 548 * Save a few arg registers early to free them for use in 549 * handle_ibrs_entry(). %r10 is especially tricky. It is not an 550 * arg register, but it holds the arg register %rcx. Profiling 551 * preserves %rcx, but may clobber %r10. Profiling may also 552 * clobber %r11, but %r11 (original %eflags) has been saved. 553 */ 554 movq %rax,TF_RAX(%rsp) /* syscall number */ 555 movq %rdx,TF_RDX(%rsp) /* arg 3 */ 556 movq %r10,TF_RCX(%rsp) /* arg 4 */ 557 SAVE_SEGS 558 call handle_ibrs_entry 559 movq PCPU(CURPCB),%r11 560 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) 561 sti 562 movq $KUDSEL,TF_SS(%rsp) 563 movq $KUCSEL,TF_CS(%rsp) 564 movq $2,TF_ERR(%rsp) 565 movq %rdi,TF_RDI(%rsp) /* arg 1 */ 566 movq %rsi,TF_RSI(%rsp) /* arg 2 */ 567 movq %r8,TF_R8(%rsp) /* arg 5 */ 568 movq %r9,TF_R9(%rsp) /* arg 6 */ 569 movq %rbx,TF_RBX(%rsp) /* C preserved */ 570 movq %rbp,TF_RBP(%rsp) /* C preserved */ 571 movq %r12,TF_R12(%rsp) /* C preserved */ 572 movq %r13,TF_R13(%rsp) /* C preserved */ 573 movq %r14,TF_R14(%rsp) /* C preserved */ 574 movq %r15,TF_R15(%rsp) /* C preserved */ 575 movl $TF_HASSEGS,TF_FLAGS(%rsp) 576 movq PCPU(CURTHREAD),%rdi 577 movq %rsp,TD_FRAME(%rdi) 578 movl TF_RFLAGS(%rsp),%esi 579 andl $PSL_T,%esi 580 call amd64_syscall 5811: movq PCPU(CURPCB),%rax 582 /* Disable interrupts before testing PCB_FULL_IRET. */ 583 cli 584 testl $PCB_FULL_IRET,PCB_FLAGS(%rax) 585 jnz 4f 586 /* Check for and handle AST's on return to userland. */ 587 movq PCPU(CURTHREAD),%rax 588 cmpl $0,TD_AST(%rax) 589 jne 3f 590 call handle_ibrs_exit 591 callq *mds_handler 592 /* Restore preserved registers. */ 593 movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ 594 movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ 595 movq TF_RDX(%rsp),%rdx /* return value 2 */ 596 movq TF_RAX(%rsp),%rax /* return value 1 */ 597 movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ 598 movq TF_RIP(%rsp),%rcx /* original %rip */ 599 movq TF_RSP(%rsp),%rsp /* user stack pointer */ 600 xorl %r8d,%r8d /* zero the rest of GPRs */ 601 xorl %r10d,%r10d 602 cmpq $~0,PCPU(UCR3) 603 je 2f 604 movq PCPU(UCR3),%r9 605 andq PCPU(UCR3_LOAD_MASK),%r9 606 movq %r9,%cr3 6072: xorl %r9d,%r9d 608 movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK) 609 swapgs 610 sysretq 611 6123: /* AST scheduled. */ 613 sti 614 movq %rsp,%rdi 615 call ast 616 jmp 1b 617 6184: /* Requested full context restore, use doreti for that. */ 619 jmp doreti 620 621/* 622 * Here for CYA insurance, in case a "syscall" instruction gets 623 * issued from 32 bit compatibility mode. MSR_CSTAR has to point 624 * to *something* if EFER_SCE is enabled. 625 */ 626IDTVEC(fast_syscall32) 627 sysret 628 629/* 630 * DB# handler is very similar to NM#, because 'mov/pop %ss' delay 631 * generation of exception until the next instruction is executed, 632 * which might be a kernel entry. So we must execute the handler 633 * on IST stack and be ready for non-kernel GSBASE. 634 */ 635IDTVEC(dbg) 636 subq $TF_RIP,%rsp 637 movl $(T_TRCTRAP),TF_TRAPNO(%rsp) 638 movq $0,TF_ADDR(%rsp) 639 movq $0,TF_ERR(%rsp) 640 movq %rdi,TF_RDI(%rsp) 641 movq %rsi,TF_RSI(%rsp) 642 movq %rdx,TF_RDX(%rsp) 643 movq %rcx,TF_RCX(%rsp) 644 movq %r8,TF_R8(%rsp) 645 movq %r9,TF_R9(%rsp) 646 movq %rax,TF_RAX(%rsp) 647 movq %rbx,TF_RBX(%rsp) 648 movq %rbp,TF_RBP(%rsp) 649 movq %r10,TF_R10(%rsp) 650 movq %r11,TF_R11(%rsp) 651 movq %r12,TF_R12(%rsp) 652 movq %r13,TF_R13(%rsp) 653 movq %r14,TF_R14(%rsp) 654 movq %r15,TF_R15(%rsp) 655 SAVE_SEGS 656 movl $TF_HASSEGS,TF_FLAGS(%rsp) 657 pushfq 658 andq $~(PSL_D | PSL_AC),(%rsp) 659 popfq 660 testb $SEL_RPL_MASK,TF_CS(%rsp) 661 jnz dbg_fromuserspace 662 lfence 663 /* 664 * We've interrupted the kernel. See comment in NMI handler about 665 * registers use. 666 */ 667 movq %cr2,%r15 668 movl $MSR_GSBASE,%ecx 669 rdmsr 670 movq %rax,%r12 671 shlq $32,%rdx 672 orq %rdx,%r12 673 /* Retrieve and load the canonical value for GS.base. */ 674 movq TF_SIZE(%rsp),%rdx 675 movl %edx,%eax 676 shrq $32,%rdx 677 wrmsr 678 movq %cr3,%r13 679 movq PCPU(KCR3),%rax 680 cmpq $~0,%rax 681 je 1f 682 movq %rax,%cr3 6831: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 684 je 2f 685 movl $MSR_IA32_SPEC_CTRL,%ecx 686 rdmsr 687 movl %eax,%r14d 688 call handle_ibrs_entry 6892: movq %rsp,%rdi 690 call trap 691 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 692 je 3f 693 movl %r14d,%eax 694 xorl %edx,%edx 695 movl $MSR_IA32_SPEC_CTRL,%ecx 696 wrmsr 697 /* 698 * Put back the preserved MSR_GSBASE value. 699 */ 7003: movl $MSR_GSBASE,%ecx 701 movq %r12,%rdx 702 movl %edx,%eax 703 shrq $32,%rdx 704 wrmsr 705 movq %r13,%cr3 706 movq %r15,%cr2 707 RESTORE_REGS 708 addq $TF_RIP,%rsp 709 jmp doreti_iret 710dbg_fromuserspace: 711 /* 712 * Switch to kernel GSBASE and kernel page table, and copy frame 713 * from the IST stack to the normal kernel stack, since trap() 714 * re-enables interrupts, and since we might trap on DB# while 715 * in trap(). 716 */ 717 swapgs 718 lfence 719 movq PCPU(KCR3),%rax 720 cmpq $~0,%rax 721 je 1f 722 movq %rax,%cr3 7231: movq PCPU(RSP0),%rax 724 movl $TF_SIZE,%ecx 725 subq %rcx,%rax 726 movq %rax,%rdi 727 movq %rsp,%rsi 728 rep;movsb 729 movq %rax,%rsp 730 call handle_ibrs_entry 731 movq PCPU(CURPCB),%rdi 732 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) 733 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 734 jz 3f 735 cmpw $KUF32SEL,TF_FS(%rsp) 736 jne 2f 737 rdfsbase %rax 738 movq %rax,PCB_FSBASE(%rdi) 7392: cmpw $KUG32SEL,TF_GS(%rsp) 740 jne 3f 741 movl $MSR_KGSBASE,%ecx 742 rdmsr 743 shlq $32,%rdx 744 orq %rdx,%rax 745 movq %rax,PCB_GSBASE(%rdi) 7463: jmp calltrap 747 748/* 749 * NMI handling is special. 750 * 751 * First, NMIs do not respect the state of the processor's RFLAGS.IF 752 * bit. The NMI handler may be entered at any time, including when 753 * the processor is in a critical section with RFLAGS.IF == 0. 754 * The processor's GS.base value could be invalid on entry to the 755 * handler. 756 * 757 * Second, the processor treats NMIs specially, blocking further NMIs 758 * until an 'iretq' instruction is executed. We thus need to execute 759 * the NMI handler with interrupts disabled, to prevent a nested interrupt 760 * from executing an 'iretq' instruction and inadvertently taking the 761 * processor out of NMI mode. 762 * 763 * Third, the NMI handler runs on its own stack (tss_ist2). The canonical 764 * GS.base value for the processor is stored just above the bottom of its 765 * NMI stack. For NMIs taken from kernel mode, the current value in 766 * the processor's GS.base is saved at entry to C-preserved register %r12, 767 * the canonical value for GS.base is then loaded into the processor, and 768 * the saved value is restored at exit time. For NMIs taken from user mode, 769 * the cheaper 'SWAPGS' instructions are used for swapping GS.base. 770 */ 771 772IDTVEC(nmi) 773 subq $TF_RIP,%rsp 774 movl $(T_NMI),TF_TRAPNO(%rsp) 775 movq $0,TF_ADDR(%rsp) 776 movq $0,TF_ERR(%rsp) 777 movq %rdi,TF_RDI(%rsp) 778 movq %rsi,TF_RSI(%rsp) 779 movq %rdx,TF_RDX(%rsp) 780 movq %rcx,TF_RCX(%rsp) 781 movq %r8,TF_R8(%rsp) 782 movq %r9,TF_R9(%rsp) 783 movq %rax,TF_RAX(%rsp) 784 movq %rbx,TF_RBX(%rsp) 785 movq %rbp,TF_RBP(%rsp) 786 movq %r10,TF_R10(%rsp) 787 movq %r11,TF_R11(%rsp) 788 movq %r12,TF_R12(%rsp) 789 movq %r13,TF_R13(%rsp) 790 movq %r14,TF_R14(%rsp) 791 movq %r15,TF_R15(%rsp) 792 SAVE_SEGS 793 movl $TF_HASSEGS,TF_FLAGS(%rsp) 794 pushfq 795 andq $~(PSL_D | PSL_AC),(%rsp) 796 popfq 797 xorl %ebx,%ebx 798 testb $SEL_RPL_MASK,TF_CS(%rsp) 799 jnz nmi_fromuserspace 800 /* 801 * We've interrupted the kernel. Preserve in callee-saved regs: 802 * GS.base in %r12, 803 * %cr3 in %r13, 804 * possibly lower half of MSR_IA32_SPEC_CTL in %r14d, 805 * %cr2 in %r15. 806 */ 807 lfence 808 movq %cr2,%r15 809 movl $MSR_GSBASE,%ecx 810 rdmsr 811 movq %rax,%r12 812 shlq $32,%rdx 813 orq %rdx,%r12 814 /* Retrieve and load the canonical value for GS.base. */ 815 movq TF_SIZE(%rsp),%rdx 816 movl %edx,%eax 817 shrq $32,%rdx 818 wrmsr 819 movq %cr3,%r13 820 movq PCPU(KCR3),%rax 821 cmpq $~0,%rax 822 je 1f 823 movq %rax,%cr3 8241: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 825 je nmi_calltrap 826 movl $MSR_IA32_SPEC_CTRL,%ecx 827 rdmsr 828 movl %eax,%r14d 829 call handle_ibrs_entry 830 jmp nmi_calltrap 831nmi_fromuserspace: 832 incl %ebx 833 swapgs 834 lfence 835 movq %cr3,%r13 836 movq PCPU(KCR3),%rax 837 cmpq $~0,%rax 838 je 1f 839 movq %rax,%cr3 8401: call handle_ibrs_entry 841 movq PCPU(CURPCB),%rdi 842 testq %rdi,%rdi 843 jz 3f 844 orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) 845 testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) 846 jz 3f 847 cmpw $KUF32SEL,TF_FS(%rsp) 848 jne 2f 849 rdfsbase %rax 850 movq %rax,PCB_FSBASE(%rdi) 8512: cmpw $KUG32SEL,TF_GS(%rsp) 852 jne 3f 853 movl $MSR_KGSBASE,%ecx 854 rdmsr 855 shlq $32,%rdx 856 orq %rdx,%rax 857 movq %rax,PCB_GSBASE(%rdi) 8583: 859/* Note: this label is also used by ddb and gdb: */ 860nmi_calltrap: 861 KMSAN_ENTER 862 movq %rsp,%rdi 863 call trap 864 KMSAN_LEAVE 865#ifdef HWPMC_HOOKS 866 /* 867 * Capture a userspace callchain if needed. 868 * 869 * - Check if the current trap was from user mode. 870 * - Check if the current thread is valid. 871 * - Check if the thread requires a user call chain to be 872 * captured. 873 * 874 * We are still in NMI mode at this point. 875 */ 876 testl %ebx,%ebx 877 jz nocallchain /* not from userspace */ 878 movq PCPU(CURTHREAD),%rax 879 orq %rax,%rax /* curthread present? */ 880 jz nocallchain 881 /* 882 * Move execution to the regular kernel stack, because we 883 * committed to return through doreti. 884 */ 885 movq %rsp,%rsi /* source stack pointer */ 886 movq $TF_SIZE,%rcx 887 movq PCPU(RSP0),%rdx 888 subq %rcx,%rdx 889 movq %rdx,%rdi /* destination stack pointer */ 890 shrq $3,%rcx /* trap frame size in long words */ 891 pushfq 892 andq $~(PSL_D | PSL_AC),(%rsp) 893 popfq 894 rep 895 movsq /* copy trapframe */ 896 movq %rdx,%rsp /* we are on the regular kstack */ 897 898 testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ 899 jz nocallchain 900 /* 901 * A user callchain is to be captured, so: 902 * - Take the processor out of "NMI" mode by faking an "iret", 903 * to allow for nested NMI interrupts. 904 * - Enable interrupts, so that copyin() can work. 905 */ 906 movl %ss,%eax 907 pushq %rax /* tf_ss */ 908 pushq %rdx /* tf_rsp (on kernel stack) */ 909 pushfq /* tf_rflags */ 910 movl %cs,%eax 911 pushq %rax /* tf_cs */ 912 pushq $outofnmi /* tf_rip */ 913 iretq 914outofnmi: 915 /* 916 * At this point the processor has exited NMI mode and is running 917 * with interrupts turned off on the normal kernel stack. 918 * 919 * If a pending NMI gets recognized at or after this point, it 920 * will cause a kernel callchain to be traced. 921 * 922 * We turn interrupts back on, and call the user callchain capture hook. 923 */ 924 movq pmc_hook,%rax 925 orq %rax,%rax 926 jz nocallchain 927 movq PCPU(CURTHREAD),%rdi /* thread */ 928 movq $PMC_FN_USER_CALLCHAIN,%rsi /* command */ 929 movq %rsp,%rdx /* frame */ 930 sti 931 call *%rax 932 cli 933nocallchain: 934#endif 935 testl %ebx,%ebx /* %ebx != 0 => return to userland */ 936 jnz doreti_exit 937 /* 938 * Restore speculation control MSR, if preserved. 939 */ 940 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 941 je 1f 942 movl %r14d,%eax 943 xorl %edx,%edx 944 movl $MSR_IA32_SPEC_CTRL,%ecx 945 wrmsr 946 /* 947 * Put back the preserved MSR_GSBASE value. 948 */ 9491: movl $MSR_GSBASE,%ecx 950 movq %r12,%rdx 951 movl %edx,%eax 952 shrq $32,%rdx 953 wrmsr 954 cmpb $0, nmi_flush_l1d_sw(%rip) 955 je 2f 956 call flush_l1d_sw /* bhyve L1TF assist */ 9572: movq %r13,%cr3 958 movq %r15,%cr2 959 RESTORE_REGS 960 addq $TF_RIP,%rsp 961 jmp doreti_iret 962 963/* 964 * MC# handling is similar to NMI. 965 * 966 * As with NMIs, machine check exceptions do not respect RFLAGS.IF and 967 * can occur at any time with a GS.base value that does not correspond 968 * to the privilege level in CS. 969 * 970 * Machine checks are not unblocked by iretq, but it is best to run 971 * the handler with interrupts disabled since the exception may have 972 * interrupted a critical section. 973 * 974 * The MC# handler runs on its own stack (tss_ist3). The canonical 975 * GS.base value for the processor is stored just above the bottom of 976 * its MC# stack. For exceptions taken from kernel mode, the current 977 * value in the processor's GS.base is saved at entry to C-preserved 978 * register %r12, the canonical value for GS.base is then loaded into 979 * the processor, and the saved value is restored at exit time. For 980 * exceptions taken from user mode, the cheaper 'SWAPGS' instructions 981 * are used for swapping GS.base. 982 */ 983 984IDTVEC(mchk) 985 subq $TF_RIP,%rsp 986 movl $(T_MCHK),TF_TRAPNO(%rsp) 987 movq $0,TF_ADDR(%rsp) 988 movq $0,TF_ERR(%rsp) 989 movq %rdi,TF_RDI(%rsp) 990 movq %rsi,TF_RSI(%rsp) 991 movq %rdx,TF_RDX(%rsp) 992 movq %rcx,TF_RCX(%rsp) 993 movq %r8,TF_R8(%rsp) 994 movq %r9,TF_R9(%rsp) 995 movq %rax,TF_RAX(%rsp) 996 movq %rbx,TF_RBX(%rsp) 997 movq %rbp,TF_RBP(%rsp) 998 movq %r10,TF_R10(%rsp) 999 movq %r11,TF_R11(%rsp) 1000 movq %r12,TF_R12(%rsp) 1001 movq %r13,TF_R13(%rsp) 1002 movq %r14,TF_R14(%rsp) 1003 movq %r15,TF_R15(%rsp) 1004 SAVE_SEGS 1005 movl $TF_HASSEGS,TF_FLAGS(%rsp) 1006 pushfq 1007 andq $~(PSL_D | PSL_AC),(%rsp) 1008 popfq 1009 xorl %ebx,%ebx 1010 testb $SEL_RPL_MASK,TF_CS(%rsp) 1011 jnz mchk_fromuserspace 1012 /* 1013 * We've interrupted the kernel. See comment in NMI handler about 1014 * registers use. 1015 */ 1016 movq %cr2,%r15 1017 movl $MSR_GSBASE,%ecx 1018 rdmsr 1019 movq %rax,%r12 1020 shlq $32,%rdx 1021 orq %rdx,%r12 1022 /* Retrieve and load the canonical value for GS.base. */ 1023 movq TF_SIZE(%rsp),%rdx 1024 movl %edx,%eax 1025 shrq $32,%rdx 1026 wrmsr 1027 movq %cr3,%r13 1028 movq PCPU(KCR3),%rax 1029 cmpq $~0,%rax 1030 je 1f 1031 movq %rax,%cr3 10321: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 1033 je mchk_calltrap 1034 movl $MSR_IA32_SPEC_CTRL,%ecx 1035 rdmsr 1036 movl %eax,%r14d 1037 call handle_ibrs_entry 1038 jmp mchk_calltrap 1039mchk_fromuserspace: 1040 incl %ebx 1041 swapgs 1042 movq %cr3,%r13 1043 movq PCPU(KCR3),%rax 1044 cmpq $~0,%rax 1045 je 1f 1046 movq %rax,%cr3 10471: call handle_ibrs_entry 1048/* Note: this label is also used by ddb and gdb: */ 1049mchk_calltrap: 1050 KMSAN_ENTER 1051 movq %rsp,%rdi 1052 call mca_intr 1053 KMSAN_LEAVE 1054 testl %ebx,%ebx /* %ebx != 0 => return to userland */ 1055 jnz doreti_exit 1056 /* 1057 * Restore speculation control MSR, if preserved. 1058 */ 1059 testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) 1060 je 1f 1061 movl %r14d,%eax 1062 xorl %edx,%edx 1063 movl $MSR_IA32_SPEC_CTRL,%ecx 1064 wrmsr 1065 /* 1066 * Put back the preserved MSR_GSBASE value. 1067 */ 10681: movl $MSR_GSBASE,%ecx 1069 movq %r12,%rdx 1070 movl %edx,%eax 1071 shrq $32,%rdx 1072 wrmsr 1073 movq %r13,%cr3 1074 movq %r15,%cr2 1075 RESTORE_REGS 1076 addq $TF_RIP,%rsp 1077 jmp doreti_iret 1078 1079ENTRY(fork_trampoline) 1080 movq %r12,%rdi /* function */ 1081 movq %rbx,%rsi /* arg1 */ 1082 movq %rsp,%rdx /* trapframe pointer */ 1083 call fork_exit 1084 jmp doreti /* Handle any ASTs */ 1085 1086/* 1087 * To efficiently implement classification of trap and interrupt handlers 1088 * for profiling, there must be only trap handlers between the labels btrap 1089 * and bintr, and only interrupt handlers between the labels bintr and 1090 * eintr. This is implemented (partly) by including files that contain 1091 * some of the handlers. Before including the files, set up a normal asm 1092 * environment so that the included files doen't need to know that they are 1093 * included. 1094 */ 1095 1096#ifdef COMPAT_FREEBSD32 1097 .data 1098 .p2align 4 1099 .text 1100 SUPERALIGN_TEXT 1101 1102#include <amd64/ia32/ia32_exception.S> 1103#endif 1104 1105 .data 1106 .p2align 4 1107 .text 1108 SUPERALIGN_TEXT 1109#include <amd64/amd64/apic_vector.S> 1110 1111#ifdef DEV_ATPIC 1112 .data 1113 .p2align 4 1114 .text 1115 SUPERALIGN_TEXT 1116 1117#include <amd64/amd64/atpic_vector.S> 1118#endif 1119 1120/* 1121 * void doreti(struct trapframe) 1122 * 1123 * Handle return from interrupts, traps and syscalls. 1124 */ 1125 .text 1126 SUPERALIGN_TEXT 1127 .type doreti,@function 1128 .globl doreti 1129doreti: 1130 /* 1131 * Check if ASTs can be handled now. 1132 */ 1133 testb $SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */ 1134 jz doreti_exit /* can't handle ASTs now if not */ 1135 1136doreti_ast: 1137 /* 1138 * Check for ASTs atomically with returning. Disabling CPU 1139 * interrupts provides sufficient locking even in the SMP case, 1140 * since we will be informed of any new ASTs by an IPI. 1141 */ 1142 cli 1143 movq PCPU(CURTHREAD),%rax 1144 cmpl $0,TD_AST(%rax) 1145 je doreti_exit 1146 sti 1147 movq %rsp,%rdi /* pass a pointer to the trapframe */ 1148 call ast 1149 jmp doreti_ast 1150 1151 /* 1152 * doreti_exit: pop registers, iret. 1153 * 1154 * The segment register pop is a special case, since it may 1155 * fault if (for example) a sigreturn specifies bad segment 1156 * registers. The fault is handled in trap.c. 1157 */ 1158doreti_exit: 1159 movq PCPU(CURPCB),%r8 1160 1161 /* 1162 * Do not reload segment registers for kernel. 1163 * Since we do not reload segments registers with sane 1164 * values on kernel entry, descriptors referenced by 1165 * segments registers might be not valid. This is fatal 1166 * for user mode, but is not a problem for the kernel. 1167 */ 1168 testb $SEL_RPL_MASK,TF_CS(%rsp) 1169 jz ld_regs 1170 testl $PCB_FULL_IRET,PCB_FLAGS(%r8) 1171 jz ld_regs 1172 andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) 1173 testl $TF_HASSEGS,TF_FLAGS(%rsp) 1174 je set_segs 1175 1176do_segs: 1177 /* Restore %fs and fsbase */ 1178 movw TF_FS(%rsp),%ax 1179 .globl ld_fs 1180ld_fs: 1181 movw %ax,%fs 1182 cmpw $KUF32SEL,%ax 1183 jne 1f 1184 movl $MSR_FSBASE,%ecx 1185 movl PCB_FSBASE(%r8),%eax 1186 movl PCB_FSBASE+4(%r8),%edx 1187 .globl ld_fsbase 1188ld_fsbase: 1189 wrmsr 11901: 1191 /* Restore %gs and gsbase */ 1192 movw TF_GS(%rsp),%si 1193 pushfq 1194 cli 1195 movl $MSR_GSBASE,%ecx 1196 /* Save current kernel %gs base into %r12d:%r13d */ 1197 rdmsr 1198 movl %eax,%r12d 1199 movl %edx,%r13d 1200 .globl ld_gs 1201ld_gs: 1202 movw %si,%gs 1203 /* Save user %gs base into %r14d:%r15d */ 1204 rdmsr 1205 movl %eax,%r14d 1206 movl %edx,%r15d 1207 /* Restore kernel %gs base */ 1208 movl %r12d,%eax 1209 movl %r13d,%edx 1210 wrmsr 1211 popfq 1212 /* 1213 * Restore user %gs base, either from PCB if used for TLS, or 1214 * from the previously saved msr read. 1215 */ 1216 movl $MSR_KGSBASE,%ecx 1217 cmpw $KUG32SEL,%si 1218 jne 1f 1219 movl PCB_GSBASE(%r8),%eax 1220 movl PCB_GSBASE+4(%r8),%edx 1221 jmp ld_gsbase 12221: 1223 movl %r14d,%eax 1224 movl %r15d,%edx 1225 .globl ld_gsbase 1226ld_gsbase: 1227 wrmsr /* May trap if non-canonical, but only for TLS. */ 1228 .globl ld_es 1229ld_es: 1230 movw TF_ES(%rsp),%es 1231 .globl ld_ds 1232ld_ds: 1233 movw TF_DS(%rsp),%ds 1234ld_regs: 1235 RESTORE_REGS 1236 testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ 1237 jz 2f /* keep running with kernel GS.base */ 1238 cli 1239 call handle_ibrs_exit_rs 1240 callq *mds_handler 1241 cmpq $~0,PCPU(UCR3) 1242 je 1f 1243 pushq %rdx 1244 movq PCPU(PTI_RSP0),%rdx 1245 subq $PTI_SIZE,%rdx 1246 movq %rax,PTI_RAX(%rdx) 1247 popq %rax 1248 movq %rax,PTI_RDX(%rdx) 1249 movq TF_RIP(%rsp),%rax 1250 movq %rax,PTI_RIP(%rdx) 1251 movq TF_CS(%rsp),%rax 1252 movq %rax,PTI_CS(%rdx) 1253 movq TF_RFLAGS(%rsp),%rax 1254 movq %rax,PTI_RFLAGS(%rdx) 1255 movq TF_RSP(%rsp),%rax 1256 movq %rax,PTI_RSP(%rdx) 1257 movq TF_SS(%rsp),%rax 1258 movq %rax,PTI_SS(%rdx) 1259 movq PCPU(UCR3),%rax 1260 andq PCPU(UCR3_LOAD_MASK),%rax 1261 movq $PMAP_UCR3_NOMASK,PCPU(UCR3_LOAD_MASK) 1262 swapgs 1263 movq %rdx,%rsp 1264 movq %rax,%cr3 1265 popq %rdx 1266 popq %rax 1267 addq $8,%rsp 1268 jmp doreti_iret 12691: swapgs 12702: addq $TF_RIP,%rsp 1271 .globl doreti_iret 1272doreti_iret: 1273 iretq 1274 1275set_segs: 1276 movw $KUDSEL,%ax 1277 movw %ax,TF_DS(%rsp) 1278 movw %ax,TF_ES(%rsp) 1279 movw $KUF32SEL,TF_FS(%rsp) 1280 movw $KUG32SEL,TF_GS(%rsp) 1281 jmp do_segs 1282 1283 /* 1284 * doreti_iret_fault. Alternative return code for 1285 * the case where we get a fault in the doreti_exit code 1286 * above. trap() (amd64/amd64/trap.c) catches this specific 1287 * case, sends the process a signal and continues in the 1288 * corresponding place in the code below. 1289 */ 1290 ALIGN_TEXT 1291 .globl doreti_iret_fault 1292doreti_iret_fault: 1293 subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ 1294 movq %rax,TF_RAX(%rsp) 1295 movq %rdx,TF_RDX(%rsp) 1296 movq %rcx,TF_RCX(%rsp) 1297 call handle_ibrs_entry 1298 testb $SEL_RPL_MASK,TF_CS(%rsp) 1299 jz 1f 1300 sti 13011: 1302 SAVE_SEGS 1303 movl $TF_HASSEGS,TF_FLAGS(%rsp) 1304 movq %rdi,TF_RDI(%rsp) 1305 movq %rsi,TF_RSI(%rsp) 1306 movq %r8,TF_R8(%rsp) 1307 movq %r9,TF_R9(%rsp) 1308 movq %rbx,TF_RBX(%rsp) 1309 movq %rbp,TF_RBP(%rsp) 1310 movq %r10,TF_R10(%rsp) 1311 movq %r11,TF_R11(%rsp) 1312 movq %r12,TF_R12(%rsp) 1313 movq %r13,TF_R13(%rsp) 1314 movq %r14,TF_R14(%rsp) 1315 movq %r15,TF_R15(%rsp) 1316 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1317 movq $0,TF_ERR(%rsp) /* XXX should be the error code */ 1318 movq $0,TF_ADDR(%rsp) 1319 jmp calltrap 1320 1321 ALIGN_TEXT 1322 .globl ds_load_fault 1323ds_load_fault: 1324 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1325 testb $SEL_RPL_MASK,TF_CS(%rsp) 1326 jz 1f 1327 sti 13281: 1329 movq %rsp,%rdi 1330 call trap 1331 movw $KUDSEL,TF_DS(%rsp) 1332 jmp doreti 1333 1334 ALIGN_TEXT 1335 .globl es_load_fault 1336es_load_fault: 1337 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1338 testl $PSL_I,TF_RFLAGS(%rsp) 1339 jz 1f 1340 sti 13411: 1342 movq %rsp,%rdi 1343 call trap 1344 movw $KUDSEL,TF_ES(%rsp) 1345 jmp doreti 1346 1347 ALIGN_TEXT 1348 .globl fs_load_fault 1349fs_load_fault: 1350 testl $PSL_I,TF_RFLAGS(%rsp) 1351 jz 1f 1352 sti 13531: 1354 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1355 movq %rsp,%rdi 1356 call trap 1357 movw $KUF32SEL,TF_FS(%rsp) 1358 jmp doreti 1359 1360 ALIGN_TEXT 1361 .globl gs_load_fault 1362gs_load_fault: 1363 popfq 1364 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1365 testl $PSL_I,TF_RFLAGS(%rsp) 1366 jz 1f 1367 sti 13681: 1369 movq %rsp,%rdi 1370 call trap 1371 movw $KUG32SEL,TF_GS(%rsp) 1372 jmp doreti 1373 1374 ALIGN_TEXT 1375 .globl fsbase_load_fault 1376fsbase_load_fault: 1377 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1378 testl $PSL_I,TF_RFLAGS(%rsp) 1379 jz 1f 1380 sti 13811: 1382 movq %rsp,%rdi 1383 call trap 1384 movq PCPU(CURTHREAD),%r8 1385 movq TD_PCB(%r8),%r8 1386 movq $0,PCB_FSBASE(%r8) 1387 jmp doreti 1388 1389 ALIGN_TEXT 1390 .globl gsbase_load_fault 1391gsbase_load_fault: 1392 movl $T_PROTFLT,TF_TRAPNO(%rsp) 1393 testl $PSL_I,TF_RFLAGS(%rsp) 1394 jz 1f 1395 sti 13961: 1397 movq %rsp,%rdi 1398 call trap 1399 movq PCPU(CURTHREAD),%r8 1400 movq TD_PCB(%r8),%r8 1401 movq $0,PCB_GSBASE(%r8) 1402 jmp doreti 1403 1404#ifdef HWPMC_HOOKS 1405 ENTRY(end_exceptions) 1406#endif 1407