1*6651c3e5Sguenther/* $OpenBSD: locore.S,v 1.147 2024/03/17 05:49:41 guenther Exp $ */ 2b5b9857bSart/* $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $ */ 3f5df1827Smickey 4f5df1827Smickey/* 5f5df1827Smickey * Copyright-o-rama! 6f5df1827Smickey */ 7f5df1827Smickey 8f5df1827Smickey/* 9f5df1827Smickey * Copyright (c) 2001 Wasabi Systems, Inc. 10f5df1827Smickey * All rights reserved. 11f5df1827Smickey * 12f5df1827Smickey * Written by Frank van der Linden for Wasabi Systems, Inc. 13f5df1827Smickey * 14f5df1827Smickey * Redistribution and use in source and binary forms, with or without 15f5df1827Smickey * modification, are permitted provided that the following conditions 16f5df1827Smickey * are met: 17f5df1827Smickey * 1. Redistributions of source code must retain the above copyright 18f5df1827Smickey * notice, this list of conditions and the following disclaimer. 19f5df1827Smickey * 2. Redistributions in binary form must reproduce the above copyright 20f5df1827Smickey * notice, this list of conditions and the following disclaimer in the 21f5df1827Smickey * documentation and/or other materials provided with the distribution. 22f5df1827Smickey * 3. All advertising materials mentioning features or use of this software 23f5df1827Smickey * must display the following acknowledgement: 24f5df1827Smickey * This product includes software developed for the NetBSD Project by 25f5df1827Smickey * Wasabi Systems, Inc. 26f5df1827Smickey * 4. The name of Wasabi Systems, Inc. may not be used to endorse 27f5df1827Smickey * or promote products derived from this software without specific prior 28f5df1827Smickey * written permission. 29f5df1827Smickey * 30f5df1827Smickey * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 31f5df1827Smickey * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 32f5df1827Smickey * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 33f5df1827Smickey * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 34f5df1827Smickey * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 35f5df1827Smickey * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 36f5df1827Smickey * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 37f5df1827Smickey * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 38f5df1827Smickey * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 39f5df1827Smickey * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 40f5df1827Smickey * POSSIBILITY OF SUCH DAMAGE. 41f5df1827Smickey */ 42f5df1827Smickey 43f5df1827Smickey 44f5df1827Smickey/*- 45f5df1827Smickey * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 46f5df1827Smickey * All rights reserved. 47f5df1827Smickey * 48f5df1827Smickey * This code is derived from software contributed to The NetBSD Foundation 49f5df1827Smickey * by Charles M. Hannum. 50f5df1827Smickey * 51f5df1827Smickey * Redistribution and use in source and binary forms, with or without 52f5df1827Smickey * modification, are permitted provided that the following conditions 53f5df1827Smickey * are met: 54f5df1827Smickey * 1. Redistributions of source code must retain the above copyright 55f5df1827Smickey * notice, this list of conditions and the following disclaimer. 56f5df1827Smickey * 2. Redistributions in binary form must reproduce the above copyright 57f5df1827Smickey * notice, this list of conditions and the following disclaimer in the 58f5df1827Smickey * documentation and/or other materials provided with the distribution. 59f5df1827Smickey * 60f5df1827Smickey * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 61f5df1827Smickey * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 62f5df1827Smickey * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 63f5df1827Smickey * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 64f5df1827Smickey * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 65f5df1827Smickey * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 66f5df1827Smickey * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 67f5df1827Smickey * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 68f5df1827Smickey * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69f5df1827Smickey * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 70f5df1827Smickey * POSSIBILITY OF SUCH DAMAGE. 71f5df1827Smickey */ 72f5df1827Smickey 73f5df1827Smickey/*- 74f5df1827Smickey * Copyright (c) 1990 The Regents of the University of California. 75f5df1827Smickey * All rights reserved. 76f5df1827Smickey * 77f5df1827Smickey * This code is derived from software contributed to Berkeley by 78f5df1827Smickey * William Jolitz. 79f5df1827Smickey * 80f5df1827Smickey * Redistribution and use in source and binary forms, with or without 81f5df1827Smickey * modification, are permitted provided that the following conditions 82f5df1827Smickey * are met: 83f5df1827Smickey * 1. Redistributions of source code must retain the above copyright 84f5df1827Smickey * notice, this list of conditions and the following disclaimer. 85f5df1827Smickey * 2. Redistributions in binary form must reproduce the above copyright 86f5df1827Smickey * notice, this list of conditions and the following disclaimer in the 87f5df1827Smickey * documentation and/or other materials provided with the distribution. 88b5b9857bSart * 3. Neither the name of the University nor the names of its contributors 89f5df1827Smickey * may be used to endorse or promote products derived from this software 90f5df1827Smickey * without specific prior written permission. 91f5df1827Smickey * 92f5df1827Smickey * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93f5df1827Smickey * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94f5df1827Smickey * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95f5df1827Smickey * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96f5df1827Smickey * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97f5df1827Smickey * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98f5df1827Smickey * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99f5df1827Smickey * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100f5df1827Smickey * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101f5df1827Smickey * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102f5df1827Smickey * SUCH DAMAGE. 103f5df1827Smickey * 104f5df1827Smickey * @(#)locore.s 7.3 (Berkeley) 5/13/91 105f5df1827Smickey */ 106f5df1827Smickey 107f5df1827Smickey#include "assym.h" 108f3c5c958Skettenis#include "efi.h" 109f5df1827Smickey#include "lapic.h" 110f5df1827Smickey#include "ksyms.h" 111d8213a49Smikeb#include "xen.h" 112218ead0bSmikeb#include "hyperv.h" 113f5df1827Smickey 114f5df1827Smickey#include <sys/syscall.h> 115f5df1827Smickey 116f5df1827Smickey#include <machine/param.h> 117c9de630fSguenther#include <machine/codepatch.h> 118b767b017Sguenther#include <machine/psl.h> 119f5df1827Smickey#include <machine/segments.h> 120f5df1827Smickey#include <machine/specialreg.h> 121c9de630fSguenther#include <machine/trap.h> /* T_PROTFLT */ 122f5df1827Smickey#include <machine/frameasm.h> 123f5df1827Smickey 124f5df1827Smickey#if NLAPIC > 0 125f5df1827Smickey#include <machine/i82489reg.h> 126f5df1827Smickey#endif 127f5df1827Smickey 128f5df1827Smickey/* 129f5df1827Smickey * override user-land alignment before including asm.h 130f5df1827Smickey */ 131cb5172cdSderaadt#define ALIGN_DATA .align 8,0xcc 132f5df1827Smickey 133f5df1827Smickey#include <machine/asm.h> 134f5df1827Smickey 135fbe53cacSkrw#define SET_CURPROC(proc,cpu) \ 136fbe53cacSkrw movq CPUVAR(SELF),cpu ; \ 137fbe53cacSkrw movq proc,CPUVAR(CURPROC) ; \ 138fbe53cacSkrw movq cpu,P_CPU(proc) 139fbe53cacSkrw 140fbe53cacSkrw#define GET_CURPCB(reg) movq CPUVAR(CURPCB),reg 141fbe53cacSkrw#define SET_CURPCB(reg) movq reg,CPUVAR(CURPCB) 142fbe53cacSkrw 143fbe53cacSkrw 144f5df1827Smickey/* 145f5df1827Smickey * Initialization 146f5df1827Smickey */ 147f5df1827Smickey .data 148f5df1827Smickey 149f5df1827Smickey#if NLAPIC > 0 1500175496dSderaadt .align NBPG, 0xcc 1514ce05526Sguenther .globl local_apic, lapic_id, lapic_tpr 1524ce05526Sguentherlocal_apic: 153f5df1827Smickey .space LAPIC_ID 1544ce05526Sguentherlapic_id: 155f5df1827Smickey .long 0x00000000 156f5df1827Smickey .space LAPIC_TPRI-(LAPIC_ID+4) 1574ce05526Sguentherlapic_tpr: 158f5df1827Smickey .space LAPIC_PPRI-LAPIC_TPRI 1594ce05526Sguentherlapic_ppr: 160f5df1827Smickey .space LAPIC_ISR-LAPIC_PPRI 1614ce05526Sguentherlapic_isr: 162f5df1827Smickey .space NBPG-LAPIC_ISR 163f5df1827Smickey#endif 164f5df1827Smickey 165f5df1827Smickey/*****************************************************************************/ 166f5df1827Smickey 167f5df1827Smickey/* 168b767b017Sguenther * Signal trampoline; copied to a page mapped into userspace. 169aa7a0a27Sguenther * gdb's backtrace logic matches against the instructions in this. 170f5df1827Smickey */ 171b983598cSderaadt .section .rodata 1724ce05526Sguenther .globl sigcode 1734ce05526Sguenthersigcode: 174339eb9d2Sderaadt endbr64 175421775b1Sguenther call 1f 176f5df1827Smickey movq %rsp,%rdi 177f5df1827Smickey pushq %rdi /* fake return address */ 178f5df1827Smickey movq $SYS_sigreturn,%rax 17920cef513Sderaadt .globl sigcodecall 18020cef513Sderaadtsigcodecall: 1811396572dSguenther syscall 1824ce05526Sguenther .globl sigcoderet 1834ce05526Sguenthersigcoderet: 184d8417bd7Sderaadt int3 185421775b1Sguenther1: JMP_RETPOLINE(rax) 1864ce05526Sguenther .globl esigcode 1874ce05526Sguentheresigcode: 1884ce05526Sguenther .globl sigfill 1894ce05526Sguenthersigfill: 190b983598cSderaadt int3 1914ce05526Sguentheresigfill: 1924ce05526Sguenther .globl sigfillsiz 1934ce05526Sguenthersigfillsiz: 1944ce05526Sguenther .long esigfill - sigfill 195b983598cSderaadt 196b983598cSderaadt .text 197f5df1827Smickey/* 198f5df1827Smickey * void lgdt(struct region_descriptor *rdp); 199f5df1827Smickey * Change the global descriptor table. 200f5df1827Smickey */ 201f5df1827SmickeyNENTRY(lgdt) 202db0a8dc5Smortimer RETGUARD_SETUP(lgdt, r11) 203f5df1827Smickey /* Reload the descriptor table. */ 204f5df1827Smickey movq %rdi,%rax 205f5df1827Smickey lgdt (%rax) 206f5df1827Smickey /* Flush the prefetch q. */ 207f5df1827Smickey jmp 1f 208f5df1827Smickey nop 209f5df1827Smickey1: /* Reload "stale" selectors. */ 210f5df1827Smickey movl $GSEL(GDATA_SEL, SEL_KPL),%eax 211f5df1827Smickey movl %eax,%ds 212f5df1827Smickey movl %eax,%es 213f5df1827Smickey movl %eax,%ss 214f5df1827Smickey /* Reload code selector by doing intersegment return. */ 215f5df1827Smickey popq %rax 216f5df1827Smickey pushq $GSEL(GCODE_SEL, SEL_KPL) 217f5df1827Smickey pushq %rax 218db0a8dc5Smortimer RETGUARD_CHECK(lgdt, r11) 219f5df1827Smickey lretq 220a324dee9SguentherEND(lgdt) 221f5df1827Smickey 222f3c5c958Skettenis#if defined(DDB) || NEFI > 0 223f5df1827SmickeyENTRY(setjmp) 2240c4bf09dSmortimer RETGUARD_SETUP(setjmp, r11) 225f5df1827Smickey /* 226f5df1827Smickey * Only save registers that must be preserved across function 227f5df1827Smickey * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) 228f5df1827Smickey * and %rip. 229f5df1827Smickey */ 230f5df1827Smickey movq %rdi,%rax 231f5df1827Smickey movq %rbx,(%rax) 232f5df1827Smickey movq %rsp,8(%rax) 233f5df1827Smickey movq %rbp,16(%rax) 234f5df1827Smickey movq %r12,24(%rax) 235f5df1827Smickey movq %r13,32(%rax) 236f5df1827Smickey movq %r14,40(%rax) 237f5df1827Smickey movq %r15,48(%rax) 238f5df1827Smickey movq (%rsp),%rdx 239f5df1827Smickey movq %rdx,56(%rax) 240f5df1827Smickey xorl %eax,%eax 2410c4bf09dSmortimer RETGUARD_CHECK(setjmp, r11) 242f5df1827Smickey ret 2433dd0809fSbluhm lfence 244a324dee9SguentherEND(setjmp) 245f5df1827Smickey 246f5df1827SmickeyENTRY(longjmp) 247f5df1827Smickey movq %rdi,%rax 248f5df1827Smickey movq 8(%rax),%rsp 2490c4bf09dSmortimer movq 56(%rax),%rdx 2500c4bf09dSmortimer movq %rdx,(%rsp) 2510c4bf09dSmortimer RETGUARD_SETUP(longjmp, r11) 2520c4bf09dSmortimer movq (%rax),%rbx 253f5df1827Smickey movq 16(%rax),%rbp 254f5df1827Smickey movq 24(%rax),%r12 255f5df1827Smickey movq 32(%rax),%r13 256f5df1827Smickey movq 40(%rax),%r14 257f5df1827Smickey movq 48(%rax),%r15 258f5df1827Smickey xorl %eax,%eax 259f5df1827Smickey incl %eax 2600c4bf09dSmortimer RETGUARD_CHECK(longjmp, r11) 261f5df1827Smickey ret 2623dd0809fSbluhm lfence 263a324dee9SguentherEND(longjmp) 264f3c5c958Skettenis#endif /* DDB || NEFI > 0 */ 265f5df1827Smickey 266f5df1827Smickey/*****************************************************************************/ 267f5df1827Smickey 268f5df1827Smickey/* 26945053f4aSart * int cpu_switchto(struct proc *old, struct proc *new) 27045053f4aSart * Switch from "old" proc to "new". 271f5df1827Smickey */ 27245053f4aSartENTRY(cpu_switchto) 273f5df1827Smickey pushq %rbx 274f5df1827Smickey pushq %rbp 275f5df1827Smickey pushq %r12 276f5df1827Smickey pushq %r13 277f5df1827Smickey pushq %r14 278f5df1827Smickey pushq %r15 279f5df1827Smickey 280fbe53cacSkrw movq %rdi, %r13 281fbe53cacSkrw movq %rsi, %r12 282fbe53cacSkrw 28360854cb9Sguenther /* Record new proc. */ 284fbe53cacSkrw movb $SONPROC,P_STAT(%r12) # p->p_stat = SONPROC 285fbe53cacSkrw SET_CURPROC(%r12,%rcx) 28660854cb9Sguenther 287c9de630fSguenther movl CPUVAR(CPUID),%r9d 288c9de630fSguenther 289c9de630fSguenther /* for the FPU/"extended CPU state" handling below */ 290c9de630fSguenther movq xsave_mask(%rip),%rdx 291c9de630fSguenther movl %edx,%eax 292c9de630fSguenther shrq $32,%rdx 293fd94711fSguenther 294fbe53cacSkrw /* If old proc exited, don't bother. */ 2959f1181d5Sguenther xorl %ecx,%ecx 296fbe53cacSkrw testq %r13,%r13 297f5df1827Smickey jz switch_exited 298f5df1827Smickey 299fbe53cacSkrw /* 300fbe53cacSkrw * Save old context. 301fbe53cacSkrw * 302fbe53cacSkrw * Registers: 3039f1181d5Sguenther * %rax - scratch 304fbe53cacSkrw * %r13 - old proc, then old pcb 3059f1181d5Sguenther * %rcx - old pmap if not P_SYSTEM 306fbe53cacSkrw * %r12 - new proc 307c9de630fSguenther * %r9d - cpuid 308fbe53cacSkrw */ 309fbe53cacSkrw 3109f1181d5Sguenther /* remember the pmap if not P_SYSTEM */ 3119f1181d5Sguenther testl $P_SYSTEM,P_FLAG(%r13) 312fbe53cacSkrw movq P_ADDR(%r13),%r13 3139f1181d5Sguenther jnz 0f 314fd94711fSguenther movq PCB_PMAP(%r13),%rcx 3159f1181d5Sguenther0: 316fd94711fSguenther 317f5df1827Smickey /* Save stack pointers. */ 318f5df1827Smickey movq %rsp,PCB_RSP(%r13) 319f5df1827Smickey movq %rbp,PCB_RBP(%r13) 320fbe53cacSkrw 321c9de630fSguenther /* 322c9de630fSguenther * If the old proc ran in userspace then save the 323c9de630fSguenther * floating-point/"extended state" registers 324c9de630fSguenther */ 3250403d5bcSguenther testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) 326c9de630fSguenther jz .Lxstate_reset 327c9de630fSguenther 328c9de630fSguenther movq %r13, %rdi 329c9de630fSguenther#if PCB_SAVEFPU != 0 330c9de630fSguenther addq $PCB_SAVEFPU,%rdi 331c9de630fSguenther#endif 332c9de630fSguenther CODEPATCH_START 333ae97d4fcSguenther fxsave64 (%rdi) 334c9de630fSguenther CODEPATCH_END(CPTAG_XSAVE) 335c9de630fSguenther 336f5df1827Smickeyswitch_exited: 337c9de630fSguenther /* now clear the xstate */ 338c9de630fSguenther movq proc0paddr(%rip),%rdi 339c9de630fSguenther#if PCB_SAVEFPU != 0 340c9de630fSguenther addq $PCB_SAVEFPU,%rdi 341c9de630fSguenther#endif 342c9de630fSguenther CODEPATCH_START 343ae97d4fcSguenther fxrstor64 (%rdi) 34455fdb5faSguenther CODEPATCH_END(CPTAG_XRSTORS) 3450403d5bcSguenther andl $~CPUPF_USERXSTATE,CPUVAR(PFLAGS) 346c9de630fSguenther 347c9de630fSguenther.Lxstate_reset: 348c9de630fSguenther /* 349c9de630fSguenther * If the segment registers haven't been reset since the old proc 350c9de630fSguenther * ran in userspace then reset them now 351c9de630fSguenther */ 3520403d5bcSguenther testl $CPUPF_USERSEGS,CPUVAR(PFLAGS) 353c9de630fSguenther jz restore_saved 3540403d5bcSguenther andl $~CPUPF_USERSEGS,CPUVAR(PFLAGS) 355b13138f2Sguenther 35699c80879Sguenther /* set %ds, %es, %fs, and %gs to expected value to prevent info leak */ 357b13138f2Sguenther movw $(GSEL(GUDATA_SEL, SEL_UPL)),%ax 358b13138f2Sguenther movw %ax,%ds 359b13138f2Sguenther movw %ax,%es 360b13138f2Sguenther movw %ax,%fs 36199c80879Sguenther cli /* block interrupts when on user GS.base */ 36299c80879Sguenther swapgs /* switch from kernel to user GS.base */ 36399c80879Sguenther movw %ax,%gs /* set %gs to UDATA and GS.base to 0 */ 36499c80879Sguenther swapgs /* back to kernel GS.base */ 365b13138f2Sguenther 366b13138f2Sguentherrestore_saved: 367f5df1827Smickey /* 36845053f4aSart * Restore saved context. 369f5df1827Smickey * 370f5df1827Smickey * Registers: 3719f1181d5Sguenther * %rax, %rdx - scratch 3729f1181d5Sguenther * %rcx - old pmap if not P_SYSTEM 373fbe53cacSkrw * %r12 - new process 3749f1181d5Sguenther * %r13 - new pcb 3759f1181d5Sguenther * %rbx - new pmap if not P_SYSTEM 376f5df1827Smickey */ 377f5df1827Smickey 3789f1181d5Sguenther movq P_ADDR(%r12),%r13 3799f1181d5Sguenther 3809f1181d5Sguenther /* remember the pmap if not P_SYSTEM */ 3819f1181d5Sguenther xorl %ebx,%ebx 3829f1181d5Sguenther testl $P_SYSTEM,P_FLAG(%r12) 3839f1181d5Sguenther jnz 1f 3849f1181d5Sguenther movq PCB_PMAP(%r13),%rbx 3859f1181d5Sguenther1: 3869f1181d5Sguenther 387fbe53cacSkrw /* No interrupts while loading new state. */ 388fbe53cacSkrw cli 389fbe53cacSkrw 390f5df1827Smickey /* Restore stack pointers. */ 391f5df1827Smickey movq PCB_RSP(%r13),%rsp 392f5df1827Smickey movq PCB_RBP(%r13),%rbp 393f5df1827Smickey 394db0a8dc5Smortimer /* Stack pivot done, setup RETGUARD */ 395db0a8dc5Smortimer RETGUARD_SETUP_OFF(cpu_switchto, r11, 6*8) 396db0a8dc5Smortimer 39700ac7aa2Sguenther /* don't switch cr3 to the same thing it already was */ 39824056ac0Sguenther movq PCB_CR3(%r13),%rax 39924056ac0Sguenther movq %cr3,%rdi 40024056ac0Sguenther xorq %rax,%rdi 40124056ac0Sguenther btrq $63,%rdi /* ignore CR3_REUSE_PCID */ 40224056ac0Sguenther testq %rdi,%rdi 40300ac7aa2Sguenther jz .Lsame_cr3 40400ac7aa2Sguenther 4059f1181d5Sguenther#ifdef DIAGNOSTIC 40624056ac0Sguenther /* verify ci_proc_pmap had been updated properly */ 40724056ac0Sguenther cmpq %rcx,CPUVAR(PROC_PMAP) 40824056ac0Sguenther jnz .Lbogus_proc_pmap 4099f1181d5Sguenther#endif 41024056ac0Sguenther /* record which pmap this CPU should get IPIs for */ 41124056ac0Sguenther movq %rbx,CPUVAR(PROC_PMAP) 4129f1181d5Sguenther 41324056ac0Sguenther.Lset_cr3: 414b767b017Sguenther movq %rax,%cr3 /* %rax used below too */ 415fd94711fSguenther 41600ac7aa2Sguenther.Lsame_cr3: 417a1fa3538Sguenther /* 418a1fa3538Sguenther * If we switched from a userland thread with a shallow call stack 419a1fa3538Sguenther * (e.g interrupt->ast->mi_ast->prempt->mi_switch->cpu_switchto) 420a1fa3538Sguenther * then the RSB may have attacker controlled entries when we switch 421a1fa3538Sguenther * to a deeper call stack in the new thread. Refill the RSB with 422a1fa3538Sguenther * entries safe to speculate into/through. 423a1fa3538Sguenther */ 424a1fa3538Sguenther RET_STACK_REFILL_WITH_RCX 425a1fa3538Sguenther 426fbe53cacSkrw /* Don't bother with the rest if switching to a system process. */ 4279f1181d5Sguenther testq %rbx,%rbx 4289f1181d5Sguenther jz switch_restored 429fd94711fSguenther 430b767b017Sguenther /* record the bits needed for future U-->K transition */ 431b767b017Sguenther movq PCB_KSTACK(%r13),%rdx 432b767b017Sguenther subq $FRAMESIZE,%rdx 433b767b017Sguenther movq %rdx,CPUVAR(KERN_RSP) 434b767b017Sguenther 4351fc8fad1Sguenther CODEPATCH_START 436b767b017Sguenther /* 437b767b017Sguenther * Meltdown: iff we're doing separate U+K and U-K page tables, 438b767b017Sguenther * then record them in cpu_info for easy access in syscall and 4391fc8fad1Sguenther * interrupt trampolines. 440b767b017Sguenther */ 4419f1181d5Sguenther movq PM_PDIRPA_INTEL(%rbx),%rdx 442f95e373fSguenther orq cr3_reuse_pcid,%rax 443f95e373fSguenther orq cr3_pcid_proc_intel,%rdx 444b767b017Sguenther movq %rax,CPUVAR(KERN_CR3) 445b767b017Sguenther movq %rdx,CPUVAR(USER_CR3) 4461fc8fad1Sguenther CODEPATCH_END(CPTAG_MELTDOWN_NOP) 447b767b017Sguenther 448fbe53cacSkrwswitch_restored: 449fbe53cacSkrw SET_CURPCB(%r13) 450fbe53cacSkrw 451f5df1827Smickey /* Interrupts are okay again. */ 452f5df1827Smickey sti 453f5df1827Smickey popq %r15 454f5df1827Smickey popq %r14 455f5df1827Smickey popq %r13 456f5df1827Smickey popq %r12 457f5df1827Smickey popq %rbp 458f5df1827Smickey popq %rbx 459db0a8dc5Smortimer RETGUARD_CHECK(cpu_switchto, r11) 460f5df1827Smickey ret 4613dd0809fSbluhm lfence 46224056ac0Sguenther 46324056ac0Sguenther#ifdef DIAGNOSTIC 46424056ac0Sguenther.Lbogus_proc_pmap: 46524056ac0Sguenther leaq bogus_proc_pmap,%rdi 4664ce05526Sguenther call panic 46724056ac0Sguenther int3 /* NOTREACHED */ 46824056ac0Sguenther .pushsection .rodata 46924056ac0Sguentherbogus_proc_pmap: 47024056ac0Sguenther .asciz "curcpu->ci_proc_pmap didn't point to previous pmap" 47124056ac0Sguenther .popsection 47224056ac0Sguenther#endif /* DIAGNOSTIC */ 473a324dee9SguentherEND(cpu_switchto) 474f5df1827Smickey 475421775b1SguentherNENTRY(retpoline_rax) 4761538f8cbSguenther CODEPATCH_START 477421775b1Sguenther JMP_RETPOLINE(rax) 4781538f8cbSguenther CODEPATCH_END(CPTAG_RETPOLINE_RAX) 4791538f8cbSguentherEND(retpoline_rax) 4801538f8cbSguenther 4811538f8cbSguentherNENTRY(__x86_indirect_thunk_r11) 4821538f8cbSguenther CODEPATCH_START 4831538f8cbSguenther JMP_RETPOLINE(r11) 4841538f8cbSguenther CODEPATCH_END(CPTAG_RETPOLINE_R11) 4851538f8cbSguentherEND(__x86_indirect_thunk_r11) 486421775b1Sguenther 487d8c6becdSguentherENTRY(cpu_idle_cycle_hlt) 488d8c6becdSguenther RETGUARD_SETUP(cpu_idle_cycle_hlt, r11) 489421775b1Sguenther sti 490421775b1Sguenther hlt 491d8c6becdSguenther RETGUARD_CHECK(cpu_idle_cycle_hlt, r11) 49245053f4aSart ret 4933dd0809fSbluhm lfence 494d8c6becdSguentherEND(cpu_idle_cycle_hlt) 495f5df1827Smickey 496f5df1827Smickey/* 497f5df1827Smickey * savectx(struct pcb *pcb); 498f5df1827Smickey * Update pcb, saving current processor state. 499f5df1827Smickey */ 500f5df1827SmickeyENTRY(savectx) 501db0a8dc5Smortimer RETGUARD_SETUP(savectx, r11) 502f5df1827Smickey /* Save stack pointers. */ 503f5df1827Smickey movq %rsp,PCB_RSP(%rdi) 504f5df1827Smickey movq %rbp,PCB_RBP(%rdi) 505db0a8dc5Smortimer RETGUARD_CHECK(savectx, r11) 506f5df1827Smickey ret 5073dd0809fSbluhm lfence 508a324dee9SguentherEND(savectx) 509f5df1827Smickey 510f5df1827Smickey/* 511b767b017Sguenther * syscall insn entry. 51274ebaa6aSguenther * Enter here with interrupts blocked; %rcx contains the caller's 51374ebaa6aSguenther * %rip and the original rflags has been copied to %r11. %cs and 51474ebaa6aSguenther * %ss have been updated to the kernel segments, but %rsp is still 51574ebaa6aSguenther * the user-space value. 516c9de630fSguenther * First order of business is to swap to the kernel GS.base so that 5171fc8fad1Sguenther * we can access our struct cpu_info. After possibly mucking with 5181fc8fad1Sguenther * pagetables, we switch to our kernel stack. Once that's in place 519bb386764Sguenther * we can save the rest of the syscall frame and unblock interrupts. 5201fc8fad1Sguenther */ 5211fc8fad1SguentherKUTEXT_PAGE_START 5221fc8fad1Sguenther .align NBPG, 0xcc 5231fc8fad1SguentherXUsyscall_meltdown: 5241fc8fad1Sguenther /* 5251fc8fad1Sguenther * This is the real Xsyscall_meltdown page, which is mapped into 5261fc8fad1Sguenther * the U-K page tables at the same location as Xsyscall_meltdown 5271fc8fad1Sguenther * below. For this, the Meltdown case, we use the scratch space 5281fc8fad1Sguenther * in cpu_info so we can switch to the kernel page tables 5291fc8fad1Sguenther * (thank you, Intel), at which point we'll continue at the 530bb386764Sguenther * "SYSCALL_ENTRY" after Xsyscall below. 5311fc8fad1Sguenther * In case the CPU speculates past the mov to cr3, we put a 5325c3fa5a3Sguenther * retpoline-style pause-lfence-jmp-to-pause loop. 53374ebaa6aSguenther */ 5340e2deb64Sderaadt endbr64 535f5df1827Smickey swapgs 5361fc8fad1Sguenther movq %rax,CPUVAR(SCRATCH) 5371fc8fad1Sguenther movq CPUVAR(KERN_CR3),%rax 5381fc8fad1Sguenther movq %rax,%cr3 5391fc8fad1Sguenther0: pause 540a1fa3538Sguenther lfence 5411fc8fad1Sguenther jmp 0b 5421fc8fad1SguentherKUTEXT_PAGE_END 543b767b017Sguenther 5441fc8fad1SguentherKTEXT_PAGE_START 5451fc8fad1Sguenther .align NBPG, 0xcc 5460e2deb64SderaadtGENTRY(Xsyscall_meltdown) 5471fc8fad1Sguenther /* pad to match real Xsyscall_meltdown positioning above */ 5481fc8fad1Sguenther movq CPUVAR(KERN_CR3),%rax 5491fc8fad1Sguenther movq %rax,%cr3 5500e2deb64SderaadtGENTRY(Xsyscall) 5510e2deb64Sderaadt endbr64 5521fc8fad1Sguenther swapgs 5531fc8fad1Sguenther movq %rax,CPUVAR(SCRATCH) 554bb386764Sguenther SYSCALL_ENTRY /* create trapframe */ 555f5df1827Smickey sti 556f5df1827Smickey 557f5df1827Smickey movq CPUVAR(CURPROC),%r14 558f5df1827Smickey movq %rsp,P_MD_REGS(%r14) # save pointer to frame 559f5df1827Smickey andl $~MDP_IRET,P_MD_FLAGS(%r14) 560b5b9857bSart movq %rsp,%rdi 5614ce05526Sguenther call syscall 562c9ad316fSguenther 563c9ad316fSguenther.Lsyscall_check_asts: 564c9ad316fSguenther /* Check for ASTs on exit to user mode. */ 565f5df1827Smickey cli 566f5df1827Smickey CHECK_ASTPENDING(%r11) 567f5df1827Smickey je 2f 568f5df1827Smickey CLEAR_ASTPENDING(%r11) 569f5df1827Smickey sti 570b5b9857bSart movq %rsp,%rdi 5714ce05526Sguenther call ast 572c9ad316fSguenther jmp .Lsyscall_check_asts 573c9ad316fSguenther 574f5df1827Smickey2: 575f5df1827Smickey#ifdef DIAGNOSTIC 576b5b9857bSart cmpl $IPL_NONE,CPUVAR(ILEVEL) 577c9ad316fSguenther jne .Lsyscall_spl_not_lowered 5781396572dSguenther#endif /* DIAGNOSTIC */ 5791396572dSguenther 580c9ad316fSguenther /* Could registers have been changed that require an iretq? */ 581c9ad316fSguenther testl $MDP_IRET, P_MD_FLAGS(%r14) 58231b8ac92Sguenther jne intr_user_exit_post_ast 583c9ad316fSguenther 584c9de630fSguenther /* Restore FPU/"extended CPU state" if it's not already in the CPU */ 5850403d5bcSguenther testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) 586c9de630fSguenther jz .Lsyscall_restore_xstate 587c9de630fSguenther 588c9de630fSguenther /* Restore FS.base if it's not already in the CPU */ 5890403d5bcSguenther testl $CPUPF_USERSEGS,CPUVAR(PFLAGS) 590c9de630fSguenther jz .Lsyscall_restore_fsbase 591c9de630fSguenther 592c9de630fSguenther.Lsyscall_restore_registers: 5936cbac32fSguenther /* 5946cbac32fSguenther * If the pmap we're now on isn't the same as the one we 5956cbac32fSguenther * were on last time we were in userspace, then use IBPB 5966cbac32fSguenther * to prevent cross-process branch-target injection. 5976cbac32fSguenther */ 5986cbac32fSguenther CODEPATCH_START 5996cbac32fSguenther movq CPUVAR(PROC_PMAP),%rbx 6006cbac32fSguenther cmpq CPUVAR(USER_PMAP),%rbx 6016cbac32fSguenther je 1f 6026cbac32fSguenther xorl %edx,%edx 6036cbac32fSguenther movl $PRED_CMD_IBPB,%eax 6046cbac32fSguenther movl $MSR_PRED_CMD,%ecx 6056cbac32fSguenther wrmsr 6066cbac32fSguenther movq %rbx,CPUVAR(USER_PMAP) 6076cbac32fSguenther1: 6086cbac32fSguenther CODEPATCH_END(CPTAG_IBPB_NOP) 609e9e0c464Sderaadt call pku_xonly 610a4858df8Sguenther RET_STACK_REFILL_WITH_RCX 611a4858df8Sguenther 6121396572dSguenther movq TF_R8(%rsp),%r8 6131396572dSguenther movq TF_R9(%rsp),%r9 6141396572dSguenther movq TF_R10(%rsp),%r10 6151396572dSguenther movq TF_R12(%rsp),%r12 6161396572dSguenther movq TF_R13(%rsp),%r13 6171396572dSguenther movq TF_R14(%rsp),%r14 6181396572dSguenther movq TF_R15(%rsp),%r15 619*6651c3e5Sguenther movq TF_RBX(%rsp),%rbx 620*6651c3e5Sguenther movq TF_RDX(%rsp),%rdx 621a0dcb178Sguenther 622a0dcb178Sguenther CODEPATCH_START 623*6651c3e5Sguenther xorl %edi,%edi 624*6651c3e5Sguenther xorl %esi,%esi 625*6651c3e5Sguenther xorl %r11d,%r11d 626*6651c3e5Sguenther xorl %eax,%eax 627*6651c3e5Sguenther xorl %ecx,%ecx 628a0dcb178Sguenther movw %ds,TF_R8(%rsp) 629a0dcb178Sguenther verw TF_R8(%rsp) 630a0dcb178Sguenther CODEPATCH_END(CPTAG_MDS) 631a0dcb178Sguenther 632a0dcb178Sguenther movq TF_RDI(%rsp),%rdi 633a0dcb178Sguenther movq TF_RSI(%rsp),%rsi 6341396572dSguenther movq TF_RBP(%rsp),%rbp 6351396572dSguenther 636b767b017Sguenther /* 637b767b017Sguenther * We need to finish reading from the trapframe, then switch 638b767b017Sguenther * to the user page tables, swapgs, and return. We need 639b767b017Sguenther * to get the final value for the register that was used 640b767b017Sguenther * for the mov to %cr3 from somewhere accessible on the 641b767b017Sguenther * user page tables, so save it in CPUVAR(SCRATCH) across 642b767b017Sguenther * the switch. 643b767b017Sguenther */ 6441396572dSguenther movq TF_RAX(%rsp),%rax 6451396572dSguenther movq TF_RIP(%rsp),%rcx 6461396572dSguenther movq TF_RFLAGS(%rsp),%r11 6471396572dSguenther movq TF_RSP(%rsp),%rsp 6481fc8fad1Sguenther CODEPATCH_START 6491fc8fad1Sguenther movq %rax,CPUVAR(SCRATCH) 6501fc8fad1Sguenther movq CPUVAR(USER_CR3),%rax 651f95e373fSguenther PCID_SET_REUSE_NOP 652b767b017Sguenther movq %rax,%cr3 6531fc8fad1SguentherXsyscall_trampback: 6541fc8fad1Sguenther0: pause 655a1fa3538Sguenther lfence 6561fc8fad1Sguenther jmp 0b 6571fc8fad1Sguenther CODEPATCH_END(CPTAG_MELTDOWN_NOP) 658b767b017Sguenther swapgs 659f5df1827Smickey sysretq 660a324dee9SguentherEND(Xsyscall) 661a324dee9SguentherEND(Xsyscall_meltdown) 6621fc8fad1SguentherKTEXT_PAGE_END 6631fc8fad1Sguenther 6641fc8fad1SguentherKUTEXT_PAGE_START 6651fc8fad1Sguenther .space (Xsyscall_trampback - Xsyscall_meltdown) - \ 6661fc8fad1Sguenther (. - XUsyscall_meltdown), 0xcc 6671fc8fad1Sguenther movq %rax,%cr3 6681fc8fad1Sguenther movq CPUVAR(SCRATCH),%rax 6691fc8fad1Sguenther swapgs 6701fc8fad1Sguenther sysretq 6711fc8fad1SguentherKUTEXT_PAGE_END 672f5df1827Smickey 673b767b017Sguenther .text 674b433e1a0Sguenther _ALIGN_TRAPS 675c9de630fSguenther /* in this case, need FS.base but not xstate, rarely happens */ 676c9de630fSguenther.Lsyscall_restore_fsbase: /* CPU doesn't have curproc's FS.base */ 6770403d5bcSguenther orl $CPUPF_USERSEGS,CPUVAR(PFLAGS) 678c9de630fSguenther movq CPUVAR(CURPCB),%rdi 679c9de630fSguenther jmp .Lsyscall_restore_fsbase_real 680c9de630fSguenther 681b433e1a0Sguenther _ALIGN_TRAPS 682c9de630fSguenther.Lsyscall_restore_xstate: /* CPU doesn't have curproc's xstate */ 6830403d5bcSguenther orl $(CPUPF_USERXSTATE|CPUPF_USERSEGS),CPUVAR(PFLAGS) 684c9de630fSguenther movq CPUVAR(CURPCB),%rdi 685c9de630fSguenther movq xsave_mask(%rip),%rdx 686c9de630fSguenther movl %edx,%eax 687c9de630fSguenther shrq $32,%rdx 688c9de630fSguenther#if PCB_SAVEFPU != 0 689c9de630fSguenther addq $PCB_SAVEFPU,%rdi 690c9de630fSguenther#endif 691c9de630fSguenther /* untouched state so can't fault */ 692c9de630fSguenther CODEPATCH_START 693ae97d4fcSguenther fxrstor64 (%rdi) 69455fdb5faSguenther CODEPATCH_END(CPTAG_XRSTORS) 695c9de630fSguenther#if PCB_SAVEFPU != 0 696c9de630fSguenther subq $PCB_SAVEFPU,%rdi 697c9de630fSguenther#endif 698c9de630fSguenther.Lsyscall_restore_fsbase_real: 699c9de630fSguenther movq PCB_FSBASE(%rdi),%rdx 700c9de630fSguenther movl %edx,%eax 701c9de630fSguenther shrq $32,%rdx 702c9de630fSguenther movl $MSR_FSBASE,%ecx 703c9de630fSguenther wrmsr 704c9de630fSguenther jmp .Lsyscall_restore_registers 705b767b017Sguenther 706f5df1827Smickey#ifdef DIAGNOSTIC 707c9ad316fSguenther.Lsyscall_spl_not_lowered: 708be97ab8cSguenther leaq spl_lowered(%rip), %rdi 709c4495499Sguenther movl TF_ERR(%rsp),%esi /* syscall # stashed above */ 710f5df1827Smickey movl TF_RDI(%rsp),%edx 711f5df1827Smickey movl %ebx,%ecx 712b5b9857bSart movl CPUVAR(ILEVEL),%r8d 713f5df1827Smickey xorq %rax,%rax 7144ce05526Sguenther call printf 715f5df1827Smickey#ifdef DDB 716f5df1827Smickey int $3 717f5df1827Smickey#endif /* DDB */ 718f5df1827Smickey movl $IPL_NONE,CPUVAR(ILEVEL) 719c9ad316fSguenther jmp .Lsyscall_check_asts 720f5df1827Smickey 72132d5845fSderaadt .section .rodata 72232d5845fSderaadtspl_lowered: 72332d5845fSderaadt .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n" 72432d5845fSderaadt .text 72532d5845fSderaadt#endif 726f5df1827Smickey 727f5df1827SmickeyNENTRY(proc_trampoline) 728bb00e811Sclaudio call proc_trampoline_mi 729f5df1827Smickey movq %r13,%rdi 730421775b1Sguenther movq %r12,%rax 731421775b1Sguenther call retpoline_rax 732c9ad316fSguenther movq CPUVAR(CURPROC),%r14 733c9ad316fSguenther jmp .Lsyscall_check_asts 734a324dee9SguentherEND(proc_trampoline) 735f5df1827Smickey 736f5df1827Smickey 737f5df1827Smickey/* 73831b8ac92Sguenther * Returning to userspace via iretq. We do things in this order: 73931b8ac92Sguenther * - check for ASTs 740c9de630fSguenther * - restore FPU/"extended CPU state" if it's not already in the CPU 74131b8ac92Sguenther * - DIAGNOSTIC: no more C calls after this, so check the SPL 74231b8ac92Sguenther * - restore FS.base if it's not already in the CPU 743c9de630fSguenther * - restore most registers 74431b8ac92Sguenther * - update the iret frame from the trapframe 74531b8ac92Sguenther * - finish reading from the trapframe 74631b8ac92Sguenther * - switch to the trampoline stack \ 74731b8ac92Sguenther * - jump to the .kutext segment |-- Meltdown workaround 74831b8ac92Sguenther * - switch to the user page tables / 74931b8ac92Sguenther * - swapgs 75031b8ac92Sguenther * - iretq 7511396572dSguenther */ 7521fc8fad1SguentherKTEXT_PAGE_START 7531fc8fad1Sguenther _ALIGN_TRAPS 7541fc8fad1SguentherGENTRY(intr_user_exit) 755b767b017Sguenther#ifdef DIAGNOSTIC 756b767b017Sguenther pushfq 757b767b017Sguenther popq %rdx 758b767b017Sguenther testq $PSL_I,%rdx 75931b8ac92Sguenther jnz .Lintr_user_exit_not_blocked 760b767b017Sguenther#endif /* DIAGNOSTIC */ 76131b8ac92Sguenther 76231b8ac92Sguenther /* Check for ASTs */ 76331b8ac92Sguenther CHECK_ASTPENDING(%r11) 76431b8ac92Sguenther je intr_user_exit_post_ast 76531b8ac92Sguenther CLEAR_ASTPENDING(%r11) 76631b8ac92Sguenther sti 76731b8ac92Sguenther movq %rsp,%rdi 7684ce05526Sguenther call ast 76931b8ac92Sguenther cli 77031b8ac92Sguenther jmp intr_user_exit 77131b8ac92Sguenther 77231b8ac92Sguentherintr_user_exit_post_ast: 773c9de630fSguenther /* Restore FPU/"extended CPU state" if it's not already in the CPU */ 7740403d5bcSguenther testl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) 775c9de630fSguenther jz .Lintr_restore_xstate 776c9de630fSguenther 7776cbac32fSguenther /* Restore FS.base if it's not already in the CPU */ 7786cbac32fSguenther testl $CPUPF_USERSEGS,CPUVAR(PFLAGS) 7796cbac32fSguenther jz .Lintr_restore_fsbase 7806cbac32fSguenther 7816cbac32fSguenther.Lintr_restore_registers: 78231b8ac92Sguenther#ifdef DIAGNOSTIC 78331b8ac92Sguenther /* no more C calls after this, so check the SPL */ 78431b8ac92Sguenther cmpl $0,CPUVAR(ILEVEL) 78531b8ac92Sguenther jne .Luser_spl_not_lowered 78631b8ac92Sguenther#endif /* DIAGNOSTIC */ 78731b8ac92Sguenther 7886cbac32fSguenther /* 7896cbac32fSguenther * If the pmap we're now on isn't the same as the one we 7906cbac32fSguenther * were on last time we were in userspace, then use IBPB 7916cbac32fSguenther * to prevent cross-process branch-target injection. 7926cbac32fSguenther */ 7936cbac32fSguenther CODEPATCH_START 7946cbac32fSguenther movq CPUVAR(PROC_PMAP),%rbx 7956cbac32fSguenther cmpq CPUVAR(USER_PMAP),%rbx 7966cbac32fSguenther je 1f 7976cbac32fSguenther xorl %edx,%edx 7986cbac32fSguenther movl $PRED_CMD_IBPB,%eax 7996cbac32fSguenther movl $MSR_PRED_CMD,%ecx 8006cbac32fSguenther wrmsr 8016cbac32fSguenther movq %rbx,CPUVAR(USER_PMAP) 8026cbac32fSguenther1: 8036cbac32fSguenther CODEPATCH_END(CPTAG_IBPB_NOP) 804e9e0c464Sderaadt call pku_xonly 805a4858df8Sguenther RET_STACK_REFILL_WITH_RCX 806a4858df8Sguenther 8071396572dSguenther movq TF_R8(%rsp),%r8 8081396572dSguenther movq TF_R9(%rsp),%r9 8091396572dSguenther movq TF_R10(%rsp),%r10 8101396572dSguenther movq TF_R12(%rsp),%r12 8111396572dSguenther movq TF_R13(%rsp),%r13 8121396572dSguenther movq TF_R14(%rsp),%r14 8131396572dSguenther movq TF_R15(%rsp),%r15 814*6651c3e5Sguenther movq TF_RBX(%rsp),%rbx 815a0dcb178Sguenther 816a0dcb178Sguenther CODEPATCH_START 817*6651c3e5Sguenther xorl %edi,%edi 818*6651c3e5Sguenther xorl %esi,%esi 819*6651c3e5Sguenther xorl %r11d,%r11d 820*6651c3e5Sguenther xorl %eax,%eax 821*6651c3e5Sguenther xorl %edx,%edx 822*6651c3e5Sguenther xorl %ecx,%ecx 823a0dcb178Sguenther movw %ds,TF_R8(%rsp) 824a0dcb178Sguenther verw TF_R8(%rsp) 825a0dcb178Sguenther CODEPATCH_END(CPTAG_MDS) 826a0dcb178Sguenther 827a0dcb178Sguenther movq TF_RDI(%rsp),%rdi 828a0dcb178Sguenther movq TF_RSI(%rsp),%rsi 8291396572dSguenther movq TF_RBP(%rsp),%rbp 8301396572dSguenther 831b767b017Sguenther /* 832b767b017Sguenther * To get the final value for the register that was used 833b767b017Sguenther * for the mov to %cr3, we need access to somewhere accessible 834b767b017Sguenther * on the user page tables, so we save it in CPUVAR(SCRATCH) 835b767b017Sguenther * across the switch. 836b767b017Sguenther */ 837b767b017Sguenther /* update iret frame */ 838b767b017Sguenther movq CPUVAR(INTR_RSP),%rdx 839b767b017Sguenther movq $(GSEL(GUCODE_SEL,SEL_UPL)),IRETQ_CS(%rdx) 840b767b017Sguenther movq TF_RIP(%rsp),%rax 841b767b017Sguenther movq %rax,IRETQ_RIP(%rdx) 842b767b017Sguenther movq TF_RFLAGS(%rsp),%rax 843b767b017Sguenther movq %rax,IRETQ_RFLAGS(%rdx) 844b767b017Sguenther movq TF_RSP(%rsp),%rax 845b767b017Sguenther movq %rax,IRETQ_RSP(%rdx) 846b767b017Sguenther movq $(GSEL(GUDATA_SEL,SEL_UPL)),IRETQ_SS(%rdx) 847b767b017Sguenther /* finish with the trap frame */ 848b767b017Sguenther movq TF_RAX(%rsp),%rax 849b767b017Sguenther movq TF_RCX(%rsp),%rcx 850b767b017Sguenther movq TF_R11(%rsp),%r11 851b767b017Sguenther /* switch to the trampoline stack */ 852b767b017Sguenther xchgq %rdx,%rsp 853b767b017Sguenther movq TF_RDX(%rdx),%rdx 8541fc8fad1Sguenther CODEPATCH_START 8551fc8fad1Sguenther movq %rax,CPUVAR(SCRATCH) 856b767b017Sguenther movq CPUVAR(USER_CR3),%rax 857f95e373fSguenther PCID_SET_REUSE_NOP 858b767b017Sguenther movq %rax,%cr3 8591fc8fad1SguentherXiretq_trampback: 8601a7819b7SguentherKTEXT_PAGE_END 8611a7819b7Sguenther/* the movq %cr3 switches to this "KUTEXT" page */ 8621a7819b7SguentherKUTEXT_PAGE_START 8631a7819b7Sguenther .space (Xiretq_trampback - Xsyscall_meltdown) - \ 8641a7819b7Sguenther (. - XUsyscall_meltdown), 0xcc 8651a7819b7Sguenther movq CPUVAR(SCRATCH),%rax 8661a7819b7Sguenther.Liretq_swapgs: 8671a7819b7Sguenther swapgs 8681a7819b7Sguentherdoreti_iret_meltdown: 8691a7819b7Sguenther iretq 8701a7819b7SguentherKUTEXT_PAGE_END 8711a7819b7Sguenther/* 8721a7819b7Sguenther * Back to the "KTEXT" page to fill in the speculation trap and the 8731a7819b7Sguenther * swapgs+iretq used for non-Meltdown kernels. This switching back 8741a7819b7Sguenther * and forth between segments is so that we can do the .space 8751a7819b7Sguenther * calculation below to guarantee the iretq's above and below line 8761a7819b7Sguenther * up, so the 'doreti_iret' label lines up with the iretq whether 8771a7819b7Sguenther * the CPU is affected by Meltdown or not. 8781a7819b7Sguenther */ 8791a7819b7SguentherKTEXT_PAGE_START 8801fc8fad1Sguenther0: pause 881a1fa3538Sguenther lfence 8821fc8fad1Sguenther jmp 0b 8831a7819b7Sguenther .space (.Liretq_swapgs - XUsyscall_meltdown) - \ 8841a7819b7Sguenther (. - Xsyscall_meltdown), 0xcc 8851fc8fad1Sguenther CODEPATCH_END(CPTAG_MELTDOWN_NOP) 886b767b017Sguenther swapgs 887b767b017Sguenther 8884ce05526Sguenther .globl doreti_iret 8894ce05526Sguentherdoreti_iret: 890b767b017Sguenther iretq 8911fc8fad1SguentherKTEXT_PAGE_END 8921fc8fad1Sguenther 89331b8ac92Sguenther .text 894b433e1a0Sguenther _ALIGN_TRAPS 895c9de630fSguenther.Lintr_restore_xstate: /* CPU doesn't have curproc's xstate */ 8960403d5bcSguenther orl $CPUPF_USERXSTATE,CPUVAR(PFLAGS) 897c9de630fSguenther movq CPUVAR(CURPCB),%rdi 898c9de630fSguenther#if PCB_SAVEFPU != 0 899c9de630fSguenther addq $PCB_SAVEFPU,%rdi 900c9de630fSguenther#endif 90155fdb5faSguenther movq xsave_mask(%rip),%rdx 90255fdb5faSguenther movl %edx,%eax 90355fdb5faSguenther shrq $32, %rdx 90455fdb5faSguenther CODEPATCH_START 90555fdb5faSguenther fxrstor64 (%rdi) 90655fdb5faSguenther CODEPATCH_END(CPTAG_XRSTORS) 90755fdb5faSguenther //testl %eax,%eax 90855fdb5faSguenther //jnz .Lintr_xrstor_faulted 909c9de630fSguenther.Lintr_restore_fsbase: /* CPU doesn't have curproc's FS.base */ 9100403d5bcSguenther orl $CPUPF_USERSEGS,CPUVAR(PFLAGS) 911c9de630fSguenther movq CPUVAR(CURPCB),%rdx 912c9de630fSguenther movq PCB_FSBASE(%rdx),%rdx 913c9de630fSguenther movl %edx,%eax 914c9de630fSguenther shrq $32,%rdx 915c9de630fSguenther movl $MSR_FSBASE,%ecx 916c9de630fSguenther wrmsr 917c9de630fSguenther jmp .Lintr_restore_registers 918c9de630fSguenther 919c9de630fSguenther.Lintr_xrstor_faulted: 920c9de630fSguenther /* 921c9de630fSguenther * xrstor faulted; we need to reset the FPU state and call trap() 922c9de630fSguenther * to post a signal, which requires interrupts be enabled. 923c9de630fSguenther */ 924c9de630fSguenther sti 925c9de630fSguenther movq proc0paddr(%rip),%rdi 926c9de630fSguenther#if PCB_SAVEFPU != 0 927c9de630fSguenther addq $PCB_SAVEFPU,%rdi 928c9de630fSguenther#endif 929c9de630fSguenther CODEPATCH_START 930ae97d4fcSguenther fxrstor64 (%rdi) 93155fdb5faSguenther CODEPATCH_END(CPTAG_XRSTORS) 932c9de630fSguenther movq $T_PROTFLT,TF_TRAPNO(%rsp) 933c9de630fSguenther jmp recall_trap 934c9de630fSguenther 935c9de630fSguenther#ifdef DIAGNOSTIC 93631b8ac92Sguenther.Lintr_user_exit_not_blocked: 93731b8ac92Sguenther movl warn_once(%rip),%edi 93831b8ac92Sguenther testl %edi,%edi 93931b8ac92Sguenther jnz 1f 94031b8ac92Sguenther incl %edi 94131b8ac92Sguenther movl %edi,warn_once(%rip) 94231b8ac92Sguenther leaq .Lnot_blocked(%rip),%rdi 9434ce05526Sguenther call printf 94431b8ac92Sguenther#ifdef DDB 94531b8ac92Sguenther int $3 94631b8ac92Sguenther#endif /* DDB */ 94731b8ac92Sguenther1: cli 94831b8ac92Sguenther jmp intr_user_exit 94931b8ac92Sguenther 95031b8ac92Sguenther.Luser_spl_not_lowered: 95131b8ac92Sguenther sti 95231b8ac92Sguenther leaq intr_spl_lowered(%rip),%rdi 95331b8ac92Sguenther movl CPUVAR(ILEVEL),%esi 95431b8ac92Sguenther xorl %edx,%edx /* always SPL zero for userspace */ 95531b8ac92Sguenther xorl %eax,%eax 9564ce05526Sguenther call printf 95731b8ac92Sguenther#ifdef DDB 95831b8ac92Sguenther int $3 95931b8ac92Sguenther#endif /* DDB */ 96031b8ac92Sguenther movl $0,CPUVAR(ILEVEL) 96131b8ac92Sguenther cli 96231b8ac92Sguenther jmp intr_user_exit 96331b8ac92Sguenther 96431b8ac92Sguenther .section .rodata 96531b8ac92Sguentherintr_spl_lowered: 96631b8ac92Sguenther .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n" 96731b8ac92Sguenther .text 96831b8ac92Sguenther#endif /* DIAGNOSTIC */ 969a324dee9SguentherEND(Xintr_user_exit) 97031b8ac92Sguenther 97131b8ac92Sguenther 97231b8ac92Sguenther/* 97331b8ac92Sguenther * Return to supervisor mode from trap or interrupt 97431b8ac92Sguenther */ 97531b8ac92SguentherNENTRY(intr_fast_exit) 97631b8ac92Sguenther#ifdef DIAGNOSTIC 97731b8ac92Sguenther pushfq 97831b8ac92Sguenther popq %rdx 97931b8ac92Sguenther testq $PSL_I,%rdx 98031b8ac92Sguenther jnz .Lintr_exit_not_blocked 98131b8ac92Sguenther#endif /* DIAGNOSTIC */ 98231b8ac92Sguenther movq TF_RDI(%rsp),%rdi 98331b8ac92Sguenther movq TF_RSI(%rsp),%rsi 98431b8ac92Sguenther movq TF_R8(%rsp),%r8 98531b8ac92Sguenther movq TF_R9(%rsp),%r9 98631b8ac92Sguenther movq TF_R10(%rsp),%r10 98731b8ac92Sguenther movq TF_R12(%rsp),%r12 98831b8ac92Sguenther movq TF_R13(%rsp),%r13 98931b8ac92Sguenther movq TF_R14(%rsp),%r14 99031b8ac92Sguenther movq TF_R15(%rsp),%r15 99131b8ac92Sguenther movq TF_RBP(%rsp),%rbp 99231b8ac92Sguenther movq TF_RBX(%rsp),%rbx 993b767b017Sguenther movq TF_RDX(%rsp),%rdx 9941396572dSguenther movq TF_RCX(%rsp),%rcx 9951396572dSguenther movq TF_R11(%rsp),%r11 9961396572dSguenther movq TF_RAX(%rsp),%rax 9971396572dSguenther addq $TF_RIP,%rsp 998c6853312Sguenther iretq 999c6853312Sguenther 1000b767b017Sguenther#ifdef DIAGNOSTIC 1001b767b017Sguenther.Lintr_exit_not_blocked: 1002b767b017Sguenther movl warn_once(%rip),%edi 1003b767b017Sguenther testl %edi,%edi 1004b767b017Sguenther jnz 1f 1005b767b017Sguenther incl %edi 1006b767b017Sguenther movl %edi,warn_once(%rip) 1007b767b017Sguenther leaq .Lnot_blocked(%rip),%rdi 10084ce05526Sguenther call printf 1009b767b017Sguenther#ifdef DDB 1010b767b017Sguenther int $3 1011b767b017Sguenther#endif /* DDB */ 1012b767b017Sguenther1: cli 1013b767b017Sguenther jmp intr_fast_exit 1014b767b017Sguenther 1015b767b017Sguenther .data 1016b767b017Sguenther.global warn_once 1017b767b017Sguentherwarn_once: 1018b767b017Sguenther .long 0 1019b767b017Sguenther .section .rodata 1020b767b017Sguenther.Lnot_blocked: 1021b767b017Sguenther .asciz "WARNING: INTERRUPTS NOT BLOCKED ON INTERRUPT RETURN: 0x%x 0x%x\n" 1022b767b017Sguenther .text 1023b767b017Sguenther#endif 1024a324dee9SguentherEND(intr_fast_exit) 10256950c8e2Smpi 1026c9de630fSguenther/* 1027c9de630fSguenther * FPU/"extended CPU state" handling 102855fdb5faSguenther * void xrstor_kern(sfp, mask) 102955fdb5faSguenther * using first of xrstors/xrstor/fxrstor, load given state 103055fdb5faSguenther * which is assumed to be trusted: i.e., unaltered from 103155fdb5faSguenther * xsaves/xsaveopt/xsave/fxsave by kernel 1032c9de630fSguenther * int xrstor_user(sfp, mask) 103355fdb5faSguenther * using first of xrstor/fxrstor, load given state which might 103455fdb5faSguenther * not be trustable: #GP faults will be caught; returns 0/1 if 103555fdb5faSguenther * okay/it trapped. 1036c9de630fSguenther * void fpusave(sfp) 1037c9de630fSguenther * save current state, but retain it in the FPU 1038c9de630fSguenther * void fpusavereset(sfp) 1039c9de630fSguenther * save current state and reset FPU to initial/kernel state 1040c4fce443Sguenther * int xsetbv_user(reg, mask) 10414039a24bSjsg * load specified %xcr# register, returns 0/1 if okay/it trapped 1042c9de630fSguenther */ 1043c9de630fSguenther 104455fdb5faSguentherENTRY(xrstor_kern) 104555fdb5faSguenther RETGUARD_SETUP(xrstor_kern, r11) 104655fdb5faSguenther movq %rsi, %rdx 104755fdb5faSguenther movl %esi, %eax 104855fdb5faSguenther shrq $32, %rdx 104955fdb5faSguenther CODEPATCH_START 105055fdb5faSguenther fxrstor64 (%rdi) 105155fdb5faSguenther CODEPATCH_END(CPTAG_XRSTORS) 105255fdb5faSguenther RETGUARD_CHECK(xrstor_kern, r11) 105355fdb5faSguenther ret 105455fdb5faSguenther lfence 105555fdb5faSguentherEND(xrstor_kern) 105655fdb5faSguenther 1057b1cdcaf5SguentherENTRY(xrstor_user) 1058db0a8dc5Smortimer RETGUARD_SETUP(xrstor_user, r11) 1059b1cdcaf5Sguenther movq %rsi, %rdx 1060b1cdcaf5Sguenther movl %esi, %eax 1061b1cdcaf5Sguenther shrq $32, %rdx 1062b1cdcaf5Sguenther .globl xrstor_fault 1063b1cdcaf5Sguentherxrstor_fault: 1064c9de630fSguenther CODEPATCH_START 1065ae97d4fcSguenther fxrstor64 (%rdi) 1066c9de630fSguenther CODEPATCH_END(CPTAG_XRSTOR) 1067198d2c0bSguenther xorl %eax, %eax 1068db0a8dc5Smortimer RETGUARD_CHECK(xrstor_user, r11) 1069198d2c0bSguenther ret 10703dd0809fSbluhm lfence 1071be97ab8cSguentherNENTRY(xrstor_resume) 1072198d2c0bSguenther movl $1, %eax 1073db0a8dc5Smortimer RETGUARD_CHECK(xrstor_user, r11) 1074b1cdcaf5Sguenther ret 10753dd0809fSbluhm lfence 1076c9de630fSguentherEND(xrstor_user) 1077c9de630fSguenther 1078c9de630fSguentherENTRY(fpusave) 1079db0a8dc5Smortimer RETGUARD_SETUP(fpusave, r11) 1080c9de630fSguenther movq xsave_mask(%rip),%rdx 1081c9de630fSguenther movl %edx,%eax 1082c9de630fSguenther shrq $32,%rdx 1083c9de630fSguenther CODEPATCH_START 1084ae97d4fcSguenther fxsave64 (%rdi) 1085c9de630fSguenther CODEPATCH_END(CPTAG_XSAVE) 1086db0a8dc5Smortimer RETGUARD_CHECK(fpusave, r11) 1087c9de630fSguenther ret 10883dd0809fSbluhm lfence 1089c9de630fSguentherEND(fpusave) 1090c9de630fSguenther 1091c9de630fSguentherENTRY(fpusavereset) 1092db0a8dc5Smortimer RETGUARD_SETUP(fpusavereset, r11) 1093c9de630fSguenther movq xsave_mask(%rip),%rdx 1094c9de630fSguenther movl %edx,%eax 1095c9de630fSguenther shrq $32,%rdx 1096c9de630fSguenther CODEPATCH_START 1097ae97d4fcSguenther fxsave64 (%rdi) 1098c9de630fSguenther CODEPATCH_END(CPTAG_XSAVE) 1099c9de630fSguenther movq proc0paddr(%rip),%rdi 1100c9de630fSguenther#if PCB_SAVEFPU != 0 1101c9de630fSguenther addq $PCB_SAVEFPU,%rdi 1102c9de630fSguenther#endif 1103c9de630fSguenther CODEPATCH_START 1104ae97d4fcSguenther fxrstor64 (%rdi) 110555fdb5faSguenther CODEPATCH_END(CPTAG_XRSTORS) 1106db0a8dc5Smortimer RETGUARD_CHECK(fpusavereset, r11) 1107c9de630fSguenther ret 11083dd0809fSbluhm lfence 1109c9de630fSguentherEND(fpusavereset) 1110c9de630fSguenther 1111c4fce443SguentherENTRY(xsetbv_user) 1112c4fce443Sguenther RETGUARD_SETUP(xsetbv_user, r11) 1113c4fce443Sguenther movl %edi, %ecx 1114c4fce443Sguenther movq %rsi, %rdx 1115c4fce443Sguenther movl %esi, %eax 1116c4fce443Sguenther shrq $32, %rdx 1117c4fce443Sguenther .globl xsetbv_fault 1118c4fce443Sguentherxsetbv_fault: 1119c4fce443Sguenther xsetbv 1120c4fce443Sguenther xorl %eax, %eax 1121c4fce443Sguenther RETGUARD_CHECK(xsetbv_user, r11) 1122c4fce443Sguenther ret 11233dd0809fSbluhm lfence 1124c4fce443SguentherNENTRY(xsetbv_resume) 1125c4fce443Sguenther movl $1, %eax 1126c4fce443Sguenther RETGUARD_CHECK(xsetbv_user, r11) 1127c4fce443Sguenther ret 11283dd0809fSbluhm lfence 1129c4fce443SguentherEND(xsetbv_user) 1130c4fce443Sguenther 113140ce500bSguentherCODEPATCH_CODE(_xrstor, xrstor64 (%rdi)) 113240ce500bSguentherCODEPATCH_CODE(_xrstors, xrstors64 (%rdi)) 113340ce500bSguentherCODEPATCH_CODE(_xsave, xsave64 (%rdi)) 113440ce500bSguentherCODEPATCH_CODE(_xsaves, xsaves64 (%rdi)) 113540ce500bSguentherCODEPATCH_CODE(_xsaveopt, xsaveopt64 (%rdi)) 113640ce500bSguentherCODEPATCH_CODE(_pcid_set_reuse, 113740ce500bSguenther orl $(CR3_REUSE_PCID >> 32),CPUVAR(USER_CR3 + 4)) 113840ce500bSguentherCODEPATCH_CODE_LEN(_jmprax, jmp *%rax; int3) 113940ce500bSguentherCODEPATCH_CODE_LEN(_jmpr11, jmp *%r11; int3) 114040ce500bSguentherCODEPATCH_CODE_LEN(_jmpr13, jmp *%r13; int3) 1141f95e373fSguenther 11423a36161cSartENTRY(pagezero) 1143db0a8dc5Smortimer RETGUARD_SETUP(pagezero, r11) 11443a36161cSart movq $-PAGE_SIZE,%rdx 11453a36161cSart subq %rdx,%rdi 11463a36161cSart xorq %rax,%rax 11473a36161cSart1: 11483a36161cSart movnti %rax,(%rdi,%rdx) 11493a36161cSart movnti %rax,8(%rdi,%rdx) 11503a36161cSart movnti %rax,16(%rdi,%rdx) 11513a36161cSart movnti %rax,24(%rdi,%rdx) 11523a36161cSart addq $32,%rdx 11533a36161cSart jne 1b 11543a36161cSart sfence 1155db0a8dc5Smortimer RETGUARD_CHECK(pagezero, r11) 11563a36161cSart ret 11573dd0809fSbluhm lfence 1158a324dee9SguentherEND(pagezero) 11593c8478a6Sgwk 1160e9e0c464Sderaadt/* void pku_xonly(void) */ 1161e9e0c464SderaadtENTRY(pku_xonly) 1162e9e0c464Sderaadt movq pg_xo,%rax /* have PKU support? */ 1163e9e0c464Sderaadt cmpq $0,%rax 1164e9e0c464Sderaadt je 1f 1165e9e0c464Sderaadt movl $0,%ecx /* force PKRU for xonly restriction */ 1166e9e0c464Sderaadt movl $0,%edx 1167e9e0c464Sderaadt movl $PGK_VALUE,%eax /* key0 normal, key1 is exec without read */ 1168e9e0c464Sderaadt wrpkru 1169e9e0c464Sderaadt1: ret 1170e9e0c464Sderaadt lfence 1171e9e0c464SderaadtEND(pku_xonly) 1172e9e0c464Sderaadt 11736f4c4614Smlarkin/* int rdmsr_safe(u_int msr, uint64_t *data) */ 11746f4c4614SmlarkinENTRY(rdmsr_safe) 11753fc877c6Smortimer RETGUARD_SETUP(rdmsr_safe, r10) 11766f4c4614Smlarkin 11776f4c4614Smlarkin movl %edi, %ecx /* u_int msr */ 11786f4c4614Smlarkin .globl rdmsr_safe_fault 11796f4c4614Smlarkinrdmsr_safe_fault: 11806f4c4614Smlarkin rdmsr 11816f4c4614Smlarkin salq $32, %rdx 11826f4c4614Smlarkin movl %eax, %eax 11836f4c4614Smlarkin orq %rdx, %rax 11846f4c4614Smlarkin movq %rax, (%rsi) /* *data */ 11856f4c4614Smlarkin xorq %rax, %rax 11866f4c4614Smlarkin 11873fc877c6Smortimer RETGUARD_CHECK(rdmsr_safe, r10) 11886f4c4614Smlarkin ret 11893dd0809fSbluhm lfence 11906f4c4614Smlarkin 11916f4c4614SmlarkinNENTRY(rdmsr_resume) 11926f4c4614Smlarkin movl $0x1, %eax 11933fc877c6Smortimer RETGUARD_CHECK(rdmsr_safe, r10) 11946f4c4614Smlarkin ret 11953dd0809fSbluhm lfence 1196a324dee9SguentherEND(rdmsr_safe) 11976f4c4614Smlarkin 1198bc3c2f61Santon#if NHYPERV > 0 1199bc3c2f61Santon/* uint64_t hv_hypercall_trampoline(uint64_t control, paddr_t input, paddr_t output) */ 1200bc3c2f61SantonNENTRY(hv_hypercall_trampoline) 1201bc3c2f61Santon endbr64 1202bc3c2f61Santon mov %rdx, %r8 1203bc3c2f61Santon mov %rsi, %rdx 1204bc3c2f61Santon mov %rdi, %rcx 1205bc3c2f61Santon jmp hv_hypercall_page 1206bc3c2f61SantonEND(hv_hypercall_trampoline) 1207bc3c2f61Santon /* Hypercall page needs to be page aligned */ 1208bc3c2f61Santon .text 1209bc3c2f61Santon .align NBPG, 0xcc 1210bc3c2f61Santon .globl hv_hypercall_page 1211bc3c2f61Santonhv_hypercall_page: 1212bc3c2f61Santon .skip 0x1000, 0xcc 1213bc3c2f61Santon#endif /* NHYPERV > 0 */ 1214bc3c2f61Santon 1215d8213a49Smikeb#if NXEN > 0 1216d8213a49Smikeb /* Hypercall page needs to be page aligned */ 1217d8213a49Smikeb .text 12180175496dSderaadt .align NBPG, 0xcc 12194ce05526Sguenther .globl xen_hypercall_page 12204ce05526Sguentherxen_hypercall_page: 12210175496dSderaadt .skip 0x1000, 0xcc 1222d8213a49Smikeb#endif /* NXEN > 0 */ 1223