xref: /openbsd/sys/arch/amd64/amd64/locore.S (revision a1fa3538)
1*a1fa3538Sguenther/*	$OpenBSD: locore.S,v 1.107 2018/07/24 02:42:25 guenther Exp $	*/
2b5b9857bSart/*	$NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $	*/
3f5df1827Smickey
4f5df1827Smickey/*
5f5df1827Smickey * Copyright-o-rama!
6f5df1827Smickey */
7f5df1827Smickey
8f5df1827Smickey/*
9f5df1827Smickey * Copyright (c) 2001 Wasabi Systems, Inc.
10f5df1827Smickey * All rights reserved.
11f5df1827Smickey *
12f5df1827Smickey * Written by Frank van der Linden for Wasabi Systems, Inc.
13f5df1827Smickey *
14f5df1827Smickey * Redistribution and use in source and binary forms, with or without
15f5df1827Smickey * modification, are permitted provided that the following conditions
16f5df1827Smickey * are met:
17f5df1827Smickey * 1. Redistributions of source code must retain the above copyright
18f5df1827Smickey *    notice, this list of conditions and the following disclaimer.
19f5df1827Smickey * 2. Redistributions in binary form must reproduce the above copyright
20f5df1827Smickey *    notice, this list of conditions and the following disclaimer in the
21f5df1827Smickey *    documentation and/or other materials provided with the distribution.
22f5df1827Smickey * 3. All advertising materials mentioning features or use of this software
23f5df1827Smickey *    must display the following acknowledgement:
24f5df1827Smickey *      This product includes software developed for the NetBSD Project by
25f5df1827Smickey *      Wasabi Systems, Inc.
26f5df1827Smickey * 4. The name of Wasabi Systems, Inc. may not be used to endorse
27f5df1827Smickey *    or promote products derived from this software without specific prior
28f5df1827Smickey *    written permission.
29f5df1827Smickey *
30f5df1827Smickey * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
31f5df1827Smickey * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32f5df1827Smickey * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33f5df1827Smickey * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
34f5df1827Smickey * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35f5df1827Smickey * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36f5df1827Smickey * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37f5df1827Smickey * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38f5df1827Smickey * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39f5df1827Smickey * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40f5df1827Smickey * POSSIBILITY OF SUCH DAMAGE.
41f5df1827Smickey */
42f5df1827Smickey
43f5df1827Smickey
44f5df1827Smickey/*-
45f5df1827Smickey * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
46f5df1827Smickey * All rights reserved.
47f5df1827Smickey *
48f5df1827Smickey * This code is derived from software contributed to The NetBSD Foundation
49f5df1827Smickey * by Charles M. Hannum.
50f5df1827Smickey *
51f5df1827Smickey * Redistribution and use in source and binary forms, with or without
52f5df1827Smickey * modification, are permitted provided that the following conditions
53f5df1827Smickey * are met:
54f5df1827Smickey * 1. Redistributions of source code must retain the above copyright
55f5df1827Smickey *    notice, this list of conditions and the following disclaimer.
56f5df1827Smickey * 2. Redistributions in binary form must reproduce the above copyright
57f5df1827Smickey *    notice, this list of conditions and the following disclaimer in the
58f5df1827Smickey *    documentation and/or other materials provided with the distribution.
59f5df1827Smickey *
60f5df1827Smickey * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
61f5df1827Smickey * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
62f5df1827Smickey * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
63f5df1827Smickey * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
64f5df1827Smickey * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
65f5df1827Smickey * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
66f5df1827Smickey * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
67f5df1827Smickey * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
68f5df1827Smickey * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69f5df1827Smickey * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
70f5df1827Smickey * POSSIBILITY OF SUCH DAMAGE.
71f5df1827Smickey */
72f5df1827Smickey
73f5df1827Smickey/*-
74f5df1827Smickey * Copyright (c) 1990 The Regents of the University of California.
75f5df1827Smickey * All rights reserved.
76f5df1827Smickey *
77f5df1827Smickey * This code is derived from software contributed to Berkeley by
78f5df1827Smickey * William Jolitz.
79f5df1827Smickey *
80f5df1827Smickey * Redistribution and use in source and binary forms, with or without
81f5df1827Smickey * modification, are permitted provided that the following conditions
82f5df1827Smickey * are met:
83f5df1827Smickey * 1. Redistributions of source code must retain the above copyright
84f5df1827Smickey *    notice, this list of conditions and the following disclaimer.
85f5df1827Smickey * 2. Redistributions in binary form must reproduce the above copyright
86f5df1827Smickey *    notice, this list of conditions and the following disclaimer in the
87f5df1827Smickey *    documentation and/or other materials provided with the distribution.
88b5b9857bSart * 3. Neither the name of the University nor the names of its contributors
89f5df1827Smickey *    may be used to endorse or promote products derived from this software
90f5df1827Smickey *    without specific prior written permission.
91f5df1827Smickey *
92f5df1827Smickey * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93f5df1827Smickey * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94f5df1827Smickey * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95f5df1827Smickey * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96f5df1827Smickey * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97f5df1827Smickey * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98f5df1827Smickey * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99f5df1827Smickey * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100f5df1827Smickey * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101f5df1827Smickey * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102f5df1827Smickey * SUCH DAMAGE.
103f5df1827Smickey *
104f5df1827Smickey *	@(#)locore.s	7.3 (Berkeley) 5/13/91
105f5df1827Smickey */
106f5df1827Smickey
107f5df1827Smickey#include "assym.h"
108f5df1827Smickey#include "lapic.h"
109f5df1827Smickey#include "ksyms.h"
110d8213a49Smikeb#include "xen.h"
111218ead0bSmikeb#include "hyperv.h"
112f5df1827Smickey
113f5df1827Smickey#include <sys/syscall.h>
114f5df1827Smickey
115f5df1827Smickey#include <machine/param.h>
116c9de630fSguenther#include <machine/codepatch.h>
117b767b017Sguenther#include <machine/psl.h>
118f5df1827Smickey#include <machine/segments.h>
119f5df1827Smickey#include <machine/specialreg.h>
120c9de630fSguenther#include <machine/trap.h>			/* T_PROTFLT */
121f5df1827Smickey#include <machine/frameasm.h>
122f5df1827Smickey
123f5df1827Smickey#if NLAPIC > 0
124f5df1827Smickey#include <machine/i82489reg.h>
125f5df1827Smickey#endif
126f5df1827Smickey
127f5df1827Smickey/*
128f5df1827Smickey * override user-land alignment before including asm.h
129f5df1827Smickey */
130cb5172cdSderaadt#define	ALIGN_DATA	.align	8,0xcc
131f5df1827Smickey
132f5df1827Smickey#include <machine/asm.h>
133f5df1827Smickey
134fbe53cacSkrw#define SET_CURPROC(proc,cpu)			\
135fbe53cacSkrw	movq	CPUVAR(SELF),cpu	;	\
136fbe53cacSkrw	movq	proc,CPUVAR(CURPROC)      ;	\
137fbe53cacSkrw	movq	cpu,P_CPU(proc)
138fbe53cacSkrw
139fbe53cacSkrw#define GET_CURPCB(reg)			movq	CPUVAR(CURPCB),reg
140fbe53cacSkrw#define SET_CURPCB(reg)			movq	reg,CPUVAR(CURPCB)
141fbe53cacSkrw
142fbe53cacSkrw
143f5df1827Smickey/*
144f5df1827Smickey * Initialization
145f5df1827Smickey */
146f5df1827Smickey	.data
147f5df1827Smickey
148f5df1827Smickey#if NLAPIC > 0
1490175496dSderaadt	.align  NBPG, 0xcc
150f5df1827Smickey	.globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr)
151f5df1827Smickey_C_LABEL(local_apic):
152f5df1827Smickey	.space  LAPIC_ID
153f5df1827Smickey_C_LABEL(lapic_id):
154f5df1827Smickey	.long   0x00000000
155f5df1827Smickey	.space  LAPIC_TPRI-(LAPIC_ID+4)
156f5df1827Smickey_C_LABEL(lapic_tpr):
157f5df1827Smickey	.space  LAPIC_PPRI-LAPIC_TPRI
158f5df1827Smickey_C_LABEL(lapic_ppr):
159f5df1827Smickey	.space  LAPIC_ISR-LAPIC_PPRI
160f5df1827Smickey_C_LABEL(lapic_isr):
161f5df1827Smickey	.space  NBPG-LAPIC_ISR
162f5df1827Smickey#endif
163f5df1827Smickey
164576d2332Smlarkin	.globl	_C_LABEL(cpu_id),_C_LABEL(cpu_vendor)
165f5df1827Smickey	.globl	_C_LABEL(cpuid_level),_C_LABEL(cpu_feature)
1667196220cSmlarkin	.globl	_C_LABEL(cpu_ebxfeature)
1676995b18fShaesbaert	.globl	_C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature)
16807166672Smglocker	.globl	_C_LABEL(cpu_perf_eax)
16907166672Smglocker	.globl	_C_LABEL(cpu_perf_ebx)
17007166672Smglocker	.globl	_C_LABEL(cpu_perf_edx)
17107166672Smglocker	.globl	_C_LABEL(cpu_apmi_edx)
172886c356bSmlarkin	.globl	_C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto)
173886c356bSmlarkin	.globl	_C_LABEL(bootdev)
1746483bf47Sderaadt	.globl	_C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase)
175f5df1827Smickey	.globl	_C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
1769b36c22fSmlarkin	.globl	_C_LABEL(biosbasemem)
1776483bf47Sderaadt	.globl	_C_LABEL(bootapiver)
17836414dbbSmlarkin	.globl	_C_LABEL(pg_nx)
179ca88a4fbSmlarkin	.globl	_C_LABEL(pg_g_kern)
180b767b017Sguenther	.globl	_C_LABEL(cpu_meltdown)
181f5df1827Smickey_C_LABEL(cpu_id):	.long	0	# saved from `cpuid' instruction
182f5df1827Smickey_C_LABEL(cpu_feature):	.long	0	# feature flags from 'cpuid'
183f5df1827Smickey					#   instruction
1847196220cSmlarkin_C_LABEL(cpu_ebxfeature):.long	0	# ext. ebx feature flags from 'cpuid'
1857196220cSmlarkin_C_LABEL(cpu_ecxfeature):.long	0	# ext. ecx feature flags from 'cpuid'
1866995b18fShaesbaert_C_LABEL(ecpu_ecxfeature):.long	0	# extended ecx feature flags
18707166672Smglocker_C_LABEL(cpu_perf_eax):	.long	0	# arch. perf. mon. flags from 'cpuid'
18807166672Smglocker_C_LABEL(cpu_perf_ebx):	.long	0	# arch. perf. mon. flags from 'cpuid'
18907166672Smglocker_C_LABEL(cpu_perf_edx):	.long	0	# arch. perf. mon. flags from 'cpuid'
19007166672Smglocker_C_LABEL(cpu_apmi_edx):	.long	0	# adv. power mgmt. info. from 'cpuid'
191f5df1827Smickey_C_LABEL(cpuid_level):	.long	-1	# max. level accepted by 'cpuid'
192f5df1827Smickey					#   instruction
193f5df1827Smickey_C_LABEL(cpu_vendor):	.space	16	# vendor string returned by `cpuid'
194f5df1827Smickey					#   instruction
195c7636a68Smlarkin_C_LABEL(ssym):		.quad	0	# ptr to start of syms
196f5df1827Smickey_C_LABEL(esym):		.quad	0	# ptr to end of syms
197f5df1827Smickey_C_LABEL(atdevbase):	.quad	0	# location of start of iomem in virtual
198e9dacf7aStom_C_LABEL(bootapiver):	.long	0	# /boot API version
199f431e893Smillert_C_LABEL(bootdev):	.long	0	# device we booted from
200f5df1827Smickey_C_LABEL(proc0paddr):	.quad	0
201f5df1827Smickey_C_LABEL(PTDpaddr):	.quad	0	# paddr of PTD, for libkvm
202f5df1827Smickey#ifndef REALBASEMEM
203f5df1827Smickey_C_LABEL(biosbasemem):	.long	0	# base memory reported by BIOS
204f5df1827Smickey#else
205f5df1827Smickey_C_LABEL(biosbasemem):	.long	REALBASEMEM
206f5df1827Smickey#endif
207f5df1827Smickey#ifndef REALEXTMEM
208f5df1827Smickey_C_LABEL(biosextmem):	.long	0	# extended memory reported by BIOS
209f5df1827Smickey#else
210f5df1827Smickey_C_LABEL(biosextmem):	.long	REALEXTMEM
211f5df1827Smickey#endif
21236414dbbSmlarkin_C_LABEL(pg_nx):	.quad	0	# NX PTE bit (if CPU supports)
213ca88a4fbSmlarkin_C_LABEL(pg_g_kern):	.quad	0	# 0x100 if global pages should be used
214ca88a4fbSmlarkin					# in kernel mappings, 0 otherwise (for
215b767b017Sguenther					# insecure CPUs)
216b767b017Sguenther_C_LABEL(cpu_meltdown):	.long	0	# 1 if this CPU has Meltdown
217f5df1827Smickey
218f5df1827Smickey#define	_RELOC(x)	((x) - KERNBASE)
219f5df1827Smickey#define	RELOC(x)	_RELOC(_C_LABEL(x))
220f5df1827Smickey
221f5df1827Smickey	.globl	gdt64
222f5df1827Smickey
223f5df1827Smickeygdt64:
224029cc5b9Smikeb	.word	gdt64_end-gdt64_start-1
225f5df1827Smickey	.quad	_RELOC(gdt64_start)
2260175496dSderaadt.align 64, 0xcc
227f5df1827Smickey
228f5df1827Smickeygdt64_start:
229f5df1827Smickey	.quad 0x0000000000000000	/* always empty */
230f5df1827Smickey	.quad 0x00af9a000000ffff	/* kernel CS */
231f5df1827Smickey	.quad 0x00cf92000000ffff	/* kernel DS */
232f5df1827Smickeygdt64_end:
233f5df1827Smickey
234f5df1827Smickey/*
235f5df1827Smickey * Some hackage to deal with 64bit symbols in 32 bit mode.
236e9dacf7aStom * This may not be needed if things are cleaned up a little.
237f5df1827Smickey */
238f5df1827Smickey
239f5df1827Smickey/*****************************************************************************/
240f5df1827Smickey
241f5df1827Smickey/*
242b767b017Sguenther * Signal trampoline; copied to a page mapped into userspace.
243aa7a0a27Sguenther * gdb's backtrace logic matches against the instructions in this.
244f5df1827Smickey */
245b983598cSderaadt	.section .rodata
246b983598cSderaadt	.globl	_C_LABEL(sigcode)
247b983598cSderaadt_C_LABEL(sigcode):
248421775b1Sguenther	call	1f
249f5df1827Smickey	movq	%rsp,%rdi
250f5df1827Smickey	pushq	%rdi			/* fake return address */
251f5df1827Smickey	movq	$SYS_sigreturn,%rax
2521396572dSguenther	syscall
2537730d1d9Sderaadt	.globl	_C_LABEL(sigcoderet)
2547730d1d9Sderaadt_C_LABEL(sigcoderet):
255f5df1827Smickey	movq	$SYS_exit,%rax
256f5df1827Smickey	syscall
257b433e1a0Sguenther	_ALIGN_TRAPS
258421775b1Sguenther1:	JMP_RETPOLINE(rax)
259f5df1827Smickey	.globl	_C_LABEL(esigcode)
260f5df1827Smickey_C_LABEL(esigcode):
261f5df1827Smickey
262b983598cSderaadt	.globl	_C_LABEL(sigfill)
263b983598cSderaadt_C_LABEL(sigfill):
264b983598cSderaadt	int3
265b983598cSderaadt_C_LABEL(esigfill):
266b983598cSderaadt	.globl	_C_LABEL(sigfillsiz)
267b983598cSderaadt_C_LABEL(sigfillsiz):
268b983598cSderaadt	.long	_C_LABEL(esigfill) - _C_LABEL(sigfill)
269b983598cSderaadt
270b983598cSderaadt	.text
271f5df1827Smickey/*
272f5df1827Smickey * void lgdt(struct region_descriptor *rdp);
273f5df1827Smickey * Change the global descriptor table.
274f5df1827Smickey */
275f5df1827SmickeyNENTRY(lgdt)
276db0a8dc5Smortimer	RETGUARD_SETUP(lgdt, r11)
277f5df1827Smickey	/* Reload the descriptor table. */
278f5df1827Smickey	movq	%rdi,%rax
279f5df1827Smickey	lgdt	(%rax)
280f5df1827Smickey	/* Flush the prefetch q. */
281f5df1827Smickey	jmp	1f
282f5df1827Smickey	nop
283f5df1827Smickey1:	/* Reload "stale" selectors. */
284f5df1827Smickey	movl	$GSEL(GDATA_SEL, SEL_KPL),%eax
285f5df1827Smickey	movl	%eax,%ds
286f5df1827Smickey	movl	%eax,%es
287f5df1827Smickey	movl	%eax,%ss
288f5df1827Smickey	/* Reload code selector by doing intersegment return. */
289f5df1827Smickey	popq	%rax
290f5df1827Smickey	pushq	$GSEL(GCODE_SEL, SEL_KPL)
291f5df1827Smickey	pushq	%rax
292db0a8dc5Smortimer	RETGUARD_CHECK(lgdt, r11)
293f5df1827Smickey	lretq
294f5df1827Smickey
295f5df1827SmickeyENTRY(setjmp)
296f5df1827Smickey	/*
297f5df1827Smickey	 * Only save registers that must be preserved across function
298f5df1827Smickey	 * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
299f5df1827Smickey	 * and %rip.
300f5df1827Smickey	 */
301f5df1827Smickey	movq	%rdi,%rax
302f5df1827Smickey	movq	%rbx,(%rax)
303f5df1827Smickey	movq	%rsp,8(%rax)
304f5df1827Smickey	movq	%rbp,16(%rax)
305f5df1827Smickey	movq	%r12,24(%rax)
306f5df1827Smickey	movq	%r13,32(%rax)
307f5df1827Smickey	movq	%r14,40(%rax)
308f5df1827Smickey	movq	%r15,48(%rax)
309f5df1827Smickey	movq	(%rsp),%rdx
310f5df1827Smickey	movq	%rdx,56(%rax)
311f5df1827Smickey	xorl	%eax,%eax
312f5df1827Smickey	ret
313f5df1827Smickey
314f5df1827SmickeyENTRY(longjmp)
315f5df1827Smickey	movq	%rdi,%rax
316f5df1827Smickey	movq	(%rax),%rbx
317f5df1827Smickey	movq	8(%rax),%rsp
318f5df1827Smickey	movq	16(%rax),%rbp
319f5df1827Smickey	movq	24(%rax),%r12
320f5df1827Smickey	movq	32(%rax),%r13
321f5df1827Smickey	movq	40(%rax),%r14
322f5df1827Smickey	movq	48(%rax),%r15
323f5df1827Smickey	movq	56(%rax),%rdx
324f5df1827Smickey	movq	%rdx,(%rsp)
325f5df1827Smickey	xorl	%eax,%eax
326f5df1827Smickey	incl	%eax
327f5df1827Smickey	ret
328f5df1827Smickey
329f5df1827Smickey/*****************************************************************************/
330f5df1827Smickey
331f5df1827Smickey/*
33245053f4aSart * int cpu_switchto(struct proc *old, struct proc *new)
33345053f4aSart * Switch from "old" proc to "new".
334f5df1827Smickey */
33545053f4aSartENTRY(cpu_switchto)
336f5df1827Smickey	pushq	%rbx
337f5df1827Smickey	pushq	%rbp
338f5df1827Smickey	pushq	%r12
339f5df1827Smickey	pushq	%r13
340f5df1827Smickey	pushq	%r14
341f5df1827Smickey	pushq	%r15
342f5df1827Smickey
343fbe53cacSkrw	movq	%rdi, %r13
344fbe53cacSkrw	movq	%rsi, %r12
345fbe53cacSkrw
34660854cb9Sguenther	/* Record new proc. */
347fbe53cacSkrw	movb	$SONPROC,P_STAT(%r12)	# p->p_stat = SONPROC
348fbe53cacSkrw	SET_CURPROC(%r12,%rcx)
34960854cb9Sguenther
350c9de630fSguenther	movl	CPUVAR(CPUID),%r9d
351c9de630fSguenther
352c9de630fSguenther	/* for the FPU/"extended CPU state" handling below */
353c9de630fSguenther	movq	xsave_mask(%rip),%rdx
354c9de630fSguenther	movl	%edx,%eax
355c9de630fSguenther	shrq	$32,%rdx
356fd94711fSguenther
357fbe53cacSkrw	/* If old proc exited, don't bother. */
358fbe53cacSkrw	testq	%r13,%r13
359f5df1827Smickey	jz	switch_exited
360f5df1827Smickey
361fbe53cacSkrw	/*
362fbe53cacSkrw	 * Save old context.
363fbe53cacSkrw	 *
364fbe53cacSkrw	 * Registers:
365fbe53cacSkrw	 *   %rax, %rcx - scratch
366fbe53cacSkrw	 *   %r13 - old proc, then old pcb
367fbe53cacSkrw	 *   %r12 - new proc
368c9de630fSguenther	 *   %r9d - cpuid
369fbe53cacSkrw	 */
370fbe53cacSkrw
371fbe53cacSkrw	movq	P_ADDR(%r13),%r13
372fbe53cacSkrw
373fd94711fSguenther	/* clear the old pmap's bit for the cpu */
374fd94711fSguenther	movq	PCB_PMAP(%r13),%rcx
375fd94711fSguenther	lock
376c9de630fSguenther	btrq	%r9,PM_CPUS(%rcx)
377fd94711fSguenther
378f5df1827Smickey	/* Save stack pointers. */
379f5df1827Smickey	movq	%rsp,PCB_RSP(%r13)
380f5df1827Smickey	movq	%rbp,PCB_RBP(%r13)
381fbe53cacSkrw
382c9de630fSguenther	/*
383c9de630fSguenther	 * If the old proc ran in userspace then save the
384c9de630fSguenther	 * floating-point/"extended state" registers
385c9de630fSguenther	 */
386c9de630fSguenther	testl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
387c9de630fSguenther	jz	.Lxstate_reset
388c9de630fSguenther
389c9de630fSguenther	movq	%r13, %rdi
390c9de630fSguenther#if PCB_SAVEFPU != 0
391c9de630fSguenther	addq	$PCB_SAVEFPU,%rdi
392c9de630fSguenther#endif
393c9de630fSguenther	CODEPATCH_START
394c9de630fSguenther	.byte 0x48; fxsave	(%rdi)		/* really fxsave64 */
395c9de630fSguenther	CODEPATCH_END(CPTAG_XSAVE)
396c9de630fSguenther
397f5df1827Smickeyswitch_exited:
398c9de630fSguenther	/* now clear the xstate */
399c9de630fSguenther	movq	proc0paddr(%rip),%rdi
400c9de630fSguenther#if PCB_SAVEFPU != 0
401c9de630fSguenther	addq	$PCB_SAVEFPU,%rdi
402c9de630fSguenther#endif
403c9de630fSguenther	CODEPATCH_START
404c9de630fSguenther	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
405c9de630fSguenther	CODEPATCH_END(CPTAG_XRSTOR)
406c9de630fSguenther	andl	$~CPUF_USERXSTATE,CPUVAR(FLAGS)
407c9de630fSguenther
408c9de630fSguenther.Lxstate_reset:
409c9de630fSguenther	/*
410c9de630fSguenther	 * If the segment registers haven't been reset since the old proc
411c9de630fSguenther	 * ran in userspace then reset them now
412c9de630fSguenther	 */
413c9de630fSguenther	testl	$CPUF_USERSEGS,CPUVAR(FLAGS)
414c9de630fSguenther	jz	restore_saved
415c9de630fSguenther	andl	$~CPUF_USERSEGS,CPUVAR(FLAGS)
416b13138f2Sguenther
41799c80879Sguenther	/* set %ds, %es, %fs, and %gs to expected value to prevent info leak */
418b13138f2Sguenther	movw	$(GSEL(GUDATA_SEL, SEL_UPL)),%ax
419b13138f2Sguenther	movw	%ax,%ds
420b13138f2Sguenther	movw	%ax,%es
421b13138f2Sguenther	movw	%ax,%fs
42299c80879Sguenther	cli			/* block interrupts when on user GS.base */
42399c80879Sguenther	swapgs			/* switch from kernel to user GS.base */
42499c80879Sguenther	movw	%ax,%gs		/* set %gs to UDATA and GS.base to 0 */
42599c80879Sguenther	swapgs			/* back to kernel GS.base */
426b13138f2Sguenther
427b13138f2Sguentherrestore_saved:
428f5df1827Smickey	/*
42945053f4aSart	 * Restore saved context.
430f5df1827Smickey	 *
431f5df1827Smickey	 * Registers:
432f5df1827Smickey	 *   %rax, %rcx, %rdx - scratch
433f5df1827Smickey	 *   %r13 - new pcb
434fbe53cacSkrw	 *   %r12 - new process
435f5df1827Smickey	 */
436f5df1827Smickey
437fbe53cacSkrw	/* No interrupts while loading new state. */
438fbe53cacSkrw	cli
439fbe53cacSkrw	movq	P_ADDR(%r12),%r13
440fbe53cacSkrw
441f5df1827Smickey	/* Restore stack pointers. */
442f5df1827Smickey	movq	PCB_RSP(%r13),%rsp
443f5df1827Smickey	movq	PCB_RBP(%r13),%rbp
444f5df1827Smickey
445db0a8dc5Smortimer	/* Stack pivot done, setup RETGUARD */
446db0a8dc5Smortimer	RETGUARD_SETUP_OFF(cpu_switchto, r11, 6*8)
447db0a8dc5Smortimer
448fd94711fSguenther	movq	PCB_CR3(%r13),%rax
449b767b017Sguenther	movq	%rax,%cr3			/* %rax used below too */
450fd94711fSguenther
451*a1fa3538Sguenther	/*
452*a1fa3538Sguenther	 * If we switched from a userland thread with a shallow call stack
453*a1fa3538Sguenther	 * (e.g interrupt->ast->mi_ast->prempt->mi_switch->cpu_switchto)
454*a1fa3538Sguenther	 * then the RSB may have attacker controlled entries when we switch
455*a1fa3538Sguenther	 * to a deeper call stack in the new thread.  Refill the RSB with
456*a1fa3538Sguenther	 * entries safe to speculate into/through.
457*a1fa3538Sguenther	 */
458*a1fa3538Sguenther	RET_STACK_REFILL_WITH_RCX
459*a1fa3538Sguenther
460fbe53cacSkrw	/* Don't bother with the rest if switching to a system process. */
461fbe53cacSkrw	testl	$P_SYSTEM,P_FLAG(%r12)
462fbe53cacSkrw	jnz	switch_restored
463fd94711fSguenther
464b767b017Sguenther	/* record the bits needed for future U-->K transition */
465b767b017Sguenther	movq	PCB_KSTACK(%r13),%rdx
466b767b017Sguenther	subq	$FRAMESIZE,%rdx
467b767b017Sguenther	movq	%rdx,CPUVAR(KERN_RSP)
468b767b017Sguenther	movq	PCB_PMAP(%r13),%rcx
469b767b017Sguenther
4701fc8fad1Sguenther	CODEPATCH_START
471b767b017Sguenther	/*
472b767b017Sguenther	 * Meltdown: iff we're doing separate U+K and U-K page tables,
473b767b017Sguenther	 * then record them in cpu_info for easy access in syscall and
4741fc8fad1Sguenther	 * interrupt trampolines.
475b767b017Sguenther	 */
476b767b017Sguenther	movq	PM_PDIRPA_INTEL(%rcx),%rdx
477b767b017Sguenther	movq	%rax,CPUVAR(KERN_CR3)
478b767b017Sguenther	movq	%rdx,CPUVAR(USER_CR3)
4791fc8fad1Sguenther	CODEPATCH_END(CPTAG_MELTDOWN_NOP)
480b767b017Sguenther
481fd94711fSguenther	/* set the new pmap's bit for the cpu */
482fd94711fSguenther	lock
483c9de630fSguenther	btsq	%r9,PM_CPUS(%rcx)
484fd94711fSguenther#ifdef DIAGNOSTIC
485fd94711fSguenther	jc	_C_LABEL(switch_pmcpu_set)
486fbe53cacSkrw#endif
487f5df1827Smickey
488fbe53cacSkrwswitch_restored:
489fbe53cacSkrw	SET_CURPCB(%r13)
490fbe53cacSkrw
491f5df1827Smickey	/* Interrupts are okay again. */
492f5df1827Smickey	sti
493f5df1827Smickey	popq	%r15
494f5df1827Smickey	popq	%r14
495f5df1827Smickey	popq	%r13
496f5df1827Smickey	popq	%r12
497f5df1827Smickey	popq	%rbp
498f5df1827Smickey	popq	%rbx
499db0a8dc5Smortimer	RETGUARD_CHECK(cpu_switchto, r11)
500f5df1827Smickey	ret
501f5df1827Smickey
50245053f4aSartENTRY(cpu_idle_enter)
5032692ace4Sjordan	movq	_C_LABEL(cpu_idle_enter_fcn),%rax
5042692ace4Sjordan	cmpq	$0,%rax
505421775b1Sguenther	jne	retpoline_rax
50645053f4aSart	ret
507f5df1827Smickey
50845053f4aSartENTRY(cpu_idle_leave)
5092692ace4Sjordan	movq	_C_LABEL(cpu_idle_leave_fcn),%rax
5102692ace4Sjordan	cmpq	$0,%rax
511421775b1Sguenther	jne	retpoline_rax
512421775b1Sguenther	ret
513421775b1Sguenther
514421775b1Sguenther/* placed here for correct static branch prediction in cpu_idle_* */
515421775b1SguentherNENTRY(retpoline_rax)
516421775b1Sguenther	JMP_RETPOLINE(rax)
517421775b1Sguenther
518421775b1SguentherENTRY(cpu_idle_cycle)
519421775b1Sguenther	movq	_C_LABEL(cpu_idle_cycle_fcn),%rax
520421775b1Sguenther	cmpq	$0,%rax
521421775b1Sguenther	jne	retpoline_rax
522421775b1Sguenther	sti
523421775b1Sguenther	hlt
52445053f4aSart	ret
525f5df1827Smickey
526da4ea94cSart	.globl	_C_LABEL(panic)
527da4ea94cSart
528da4ea94cSart#ifdef DIAGNOSTIC
529fd94711fSguentherNENTRY(switch_pmcpu_set)
530be97ab8cSguenther	leaq	switch_active(%rip),%rdi
531fd94711fSguenther	call	_C_LABEL(panic)
532fd94711fSguenther	/* NOTREACHED */
533f5df1827Smickey
53432d5845fSderaadt	.section .rodata
53532d5845fSderaadtswitch_active:
53632d5845fSderaadt	.asciz	"activate already active pmap"
53732d5845fSderaadt	.text
53832d5845fSderaadt#endif /* DIAGNOSTIC */
539f5df1827Smickey/*
540f5df1827Smickey * savectx(struct pcb *pcb);
541f5df1827Smickey * Update pcb, saving current processor state.
542f5df1827Smickey */
543f5df1827SmickeyENTRY(savectx)
544db0a8dc5Smortimer	RETGUARD_SETUP(savectx, r11)
545f5df1827Smickey	/* Save stack pointers. */
546f5df1827Smickey	movq	%rsp,PCB_RSP(%rdi)
547f5df1827Smickey	movq	%rbp,PCB_RBP(%rdi)
548db0a8dc5Smortimer	RETGUARD_CHECK(savectx, r11)
549f5df1827Smickey	ret
550f5df1827Smickey
551f5df1827SmickeyIDTVEC(syscall32)
552f5df1827Smickey	sysret		/* go away please */
553f5df1827Smickey
554f5df1827Smickey/*
555b767b017Sguenther * syscall insn entry.
55674ebaa6aSguenther * Enter here with interrupts blocked; %rcx contains the caller's
55774ebaa6aSguenther * %rip and the original rflags has been copied to %r11.  %cs and
55874ebaa6aSguenther * %ss have been updated to the kernel segments, but %rsp is still
55974ebaa6aSguenther * the user-space value.
560c9de630fSguenther * First order of business is to swap to the kernel GS.base so that
5611fc8fad1Sguenther * we can access our struct cpu_info.  After possibly mucking with
5621fc8fad1Sguenther * pagetables, we switch to our kernel stack.  Once that's in place
5631fc8fad1Sguenther * we can unblock interrupts and save the rest of the syscall frame.
5641fc8fad1Sguenther */
5651fc8fad1SguentherKUTEXT_PAGE_START
5661fc8fad1Sguenther 	.align	NBPG, 0xcc
5671fc8fad1SguentherXUsyscall_meltdown:
5681fc8fad1Sguenther	/*
5691fc8fad1Sguenther	 * This is the real Xsyscall_meltdown page, which is mapped into
5701fc8fad1Sguenther	 * the U-K page tables at the same location as Xsyscall_meltdown
5711fc8fad1Sguenther	 * below.  For this, the Meltdown case, we use the scratch space
5721fc8fad1Sguenther	 * in cpu_info so we can switch to the kernel page tables
5731fc8fad1Sguenther	 * (thank you, Intel), at which point we'll continue at the
5741fc8fad1Sguenther	 * "movq CPUVAR(KERN_RSP),%rax" after Xsyscall below.
5751fc8fad1Sguenther	 * In case the CPU speculates past the mov to cr3, we put a
5761fc8fad1Sguenther	 * retpoline-style pause-jmp-to-pause loop.
57774ebaa6aSguenther	 */
578f5df1827Smickey	swapgs
5791fc8fad1Sguenther	movq	%rax,CPUVAR(SCRATCH)
5801fc8fad1Sguenther	movq	CPUVAR(KERN_CR3),%rax
5811fc8fad1Sguenther	movq	%rax,%cr3
5821fc8fad1Sguenther0:	pause
583*a1fa3538Sguenther	lfence
5841fc8fad1Sguenther	jmp	0b
5851fc8fad1SguentherKUTEXT_PAGE_END
586b767b017Sguenther
5871fc8fad1SguentherKTEXT_PAGE_START
5881fc8fad1Sguenther	.align	NBPG, 0xcc
5891fc8fad1SguentherIDTVEC_NOALIGN(syscall_meltdown)
5901fc8fad1Sguenther	/* pad to match real Xsyscall_meltdown positioning above */
5911fc8fad1Sguenther	movq	CPUVAR(KERN_CR3),%rax
5921fc8fad1Sguenther	movq	%rax,%cr3
5931fc8fad1SguentherIDTVEC_NOALIGN(syscall)
5941fc8fad1Sguenther	swapgs
5951fc8fad1Sguenther	movq	%rax,CPUVAR(SCRATCH)
5961fc8fad1Sguenther	movq	CPUVAR(KERN_RSP),%rax
5971fc8fad1Sguenther	xchgq	%rax,%rsp
598a4858df8Sguenther	movq	%rcx,TF_RCX(%rsp)
599a4858df8Sguenther	movq	%rcx,TF_RIP(%rsp)
600a4858df8Sguenther	RET_STACK_REFILL_WITH_RCX
601f5df1827Smickey	sti
602f5df1827Smickey
603f5df1827Smickey	/*
604f5df1827Smickey	 * XXX don't need this whole frame, split of the
605f5df1827Smickey	 * syscall frame and trapframe is needed.
606f5df1827Smickey	 * First, leave some room for the trapno, error,
607f5df1827Smickey	 * ss:rsp, etc, so that all GP registers can be
608f5df1827Smickey	 * saved. Then, fill in the rest.
609f5df1827Smickey	 */
610b767b017Sguenther	movq	$(GSEL(GUDATA_SEL, SEL_UPL)),TF_SS(%rsp)
6111fc8fad1Sguenther	movq	%rax,TF_RSP(%rsp)
6121fc8fad1Sguenther	movq	CPUVAR(SCRATCH),%rax
613b767b017Sguenther	INTR_SAVE_MOST_GPRS_NO_ADJ
614f5df1827Smickey	movq	%r11, TF_RFLAGS(%rsp)	/* old rflags from syscall insn */
6151f7e6433Sguenther	movq	$(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
616c4495499Sguenther	movq	%rax,TF_ERR(%rsp)	/* stash syscall # for SPL check */
617f0f07b0bSguenther	INTR_CLEAR_GPRS
618f5df1827Smickey
619f5df1827Smickey	movq	CPUVAR(CURPROC),%r14
620f5df1827Smickey	movq	%rsp,P_MD_REGS(%r14)	# save pointer to frame
621f5df1827Smickey	andl	$~MDP_IRET,P_MD_FLAGS(%r14)
622b5b9857bSart	movq	%rsp,%rdi
6234e1a77ceSsturm	call	_C_LABEL(syscall)
624c9ad316fSguenther
625c9ad316fSguenther.Lsyscall_check_asts:
626c9ad316fSguenther	/* Check for ASTs on exit to user mode. */
627f5df1827Smickey	cli
628f5df1827Smickey	CHECK_ASTPENDING(%r11)
629f5df1827Smickey	je	2f
630f5df1827Smickey	CLEAR_ASTPENDING(%r11)
631f5df1827Smickey	sti
632b5b9857bSart	movq	%rsp,%rdi
633c9ad316fSguenther	call	_C_LABEL(ast)
634c9ad316fSguenther	jmp	.Lsyscall_check_asts
635c9ad316fSguenther
636f5df1827Smickey2:
637f5df1827Smickey#ifdef DIAGNOSTIC
638b5b9857bSart	cmpl	$IPL_NONE,CPUVAR(ILEVEL)
639c9ad316fSguenther	jne	.Lsyscall_spl_not_lowered
6401396572dSguenther#endif /* DIAGNOSTIC */
6411396572dSguenther
642c9ad316fSguenther	/* Could registers have been changed that require an iretq? */
643c9ad316fSguenther	testl	$MDP_IRET, P_MD_FLAGS(%r14)
64431b8ac92Sguenther	jne	intr_user_exit_post_ast
645c9ad316fSguenther
646c9de630fSguenther	/* Restore FPU/"extended CPU state" if it's not already in the CPU */
647c9de630fSguenther	testl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
648c9de630fSguenther	jz	.Lsyscall_restore_xstate
649c9de630fSguenther
650c9de630fSguenther	/* Restore FS.base if it's not already in the CPU */
651c9de630fSguenther	testl	$CPUF_USERSEGS,CPUVAR(FLAGS)
652c9de630fSguenther	jz	.Lsyscall_restore_fsbase
653c9de630fSguenther
654c9de630fSguenther.Lsyscall_restore_registers:
655a4858df8Sguenther	RET_STACK_REFILL_WITH_RCX
656a4858df8Sguenther
6571396572dSguenther	movq	TF_RDI(%rsp),%rdi
6581396572dSguenther	movq	TF_RSI(%rsp),%rsi
6591396572dSguenther	movq	TF_R8(%rsp),%r8
6601396572dSguenther	movq	TF_R9(%rsp),%r9
6611396572dSguenther	movq	TF_R10(%rsp),%r10
6621396572dSguenther	movq	TF_R12(%rsp),%r12
6631396572dSguenther	movq	TF_R13(%rsp),%r13
6641396572dSguenther	movq	TF_R14(%rsp),%r14
6651396572dSguenther	movq	TF_R15(%rsp),%r15
6661396572dSguenther	movq	TF_RBP(%rsp),%rbp
6671396572dSguenther	movq	TF_RBX(%rsp),%rbx
6681396572dSguenther
669b767b017Sguenther	/*
670b767b017Sguenther	 * We need to finish reading from the trapframe, then switch
671b767b017Sguenther	 * to the user page tables, swapgs, and return.  We need
672b767b017Sguenther	 * to get the final value for the register that was used
673b767b017Sguenther	 * for the mov to %cr3 from somewhere accessible on the
674b767b017Sguenther	 * user page tables, so save it in CPUVAR(SCRATCH) across
675b767b017Sguenther	 * the switch.
676b767b017Sguenther	 */
6771396572dSguenther	movq	TF_RDX(%rsp),%rdx
6781396572dSguenther	movq	TF_RAX(%rsp),%rax
6791396572dSguenther	movq	TF_RIP(%rsp),%rcx
6801396572dSguenther	movq	TF_RFLAGS(%rsp),%r11
6811396572dSguenther	movq	TF_RSP(%rsp),%rsp
6821fc8fad1Sguenther	CODEPATCH_START
6831fc8fad1Sguenther	movq	%rax,CPUVAR(SCRATCH)
6841fc8fad1Sguenther	movq	CPUVAR(USER_CR3),%rax
685b767b017Sguenther	movq	%rax,%cr3
6861fc8fad1SguentherXsyscall_trampback:
6871fc8fad1Sguenther0:	pause
688*a1fa3538Sguenther	lfence
6891fc8fad1Sguenther	jmp	0b
6901fc8fad1Sguenther	CODEPATCH_END(CPTAG_MELTDOWN_NOP)
691b767b017Sguenther	swapgs
692f5df1827Smickey	sysretq
6931fc8fad1SguentherKTEXT_PAGE_END
6941fc8fad1Sguenther
6951fc8fad1SguentherKUTEXT_PAGE_START
6961fc8fad1Sguenther	.space	(Xsyscall_trampback - Xsyscall_meltdown) - \
6971fc8fad1Sguenther		(. - XUsyscall_meltdown), 0xcc
6981fc8fad1Sguenther	movq	%rax,%cr3
6991fc8fad1Sguenther	movq	CPUVAR(SCRATCH),%rax
7001fc8fad1Sguenther	swapgs
7011fc8fad1Sguenther	sysretq
7021fc8fad1SguentherKUTEXT_PAGE_END
703f5df1827Smickey
704b767b017Sguenther	.text
705b433e1a0Sguenther	_ALIGN_TRAPS
706c9de630fSguenther	/* in this case, need FS.base but not xstate, rarely happens */
707c9de630fSguenther.Lsyscall_restore_fsbase:	/* CPU doesn't have curproc's FS.base */
708c9de630fSguenther	orl	$CPUF_USERSEGS,CPUVAR(FLAGS)
709c9de630fSguenther	movq	CPUVAR(CURPCB),%rdi
710c9de630fSguenther	jmp	.Lsyscall_restore_fsbase_real
711c9de630fSguenther
712b433e1a0Sguenther	_ALIGN_TRAPS
713c9de630fSguenther.Lsyscall_restore_xstate:	/* CPU doesn't have curproc's xstate */
714c9de630fSguenther	orl	$(CPUF_USERXSTATE|CPUF_USERSEGS),CPUVAR(FLAGS)
715c9de630fSguenther	movq	CPUVAR(CURPCB),%rdi
716c9de630fSguenther	movq	xsave_mask(%rip),%rdx
717c9de630fSguenther	movl	%edx,%eax
718c9de630fSguenther	shrq	$32,%rdx
719c9de630fSguenther#if PCB_SAVEFPU != 0
720c9de630fSguenther	addq	$PCB_SAVEFPU,%rdi
721c9de630fSguenther#endif
722c9de630fSguenther	/* untouched state so can't fault */
723c9de630fSguenther	CODEPATCH_START
724c9de630fSguenther	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
725c9de630fSguenther	CODEPATCH_END(CPTAG_XRSTOR)
726c9de630fSguenther#if PCB_SAVEFPU != 0
727c9de630fSguenther	subq	$PCB_SAVEFPU,%rdi
728c9de630fSguenther#endif
729c9de630fSguenther.Lsyscall_restore_fsbase_real:
730c9de630fSguenther	movq	PCB_FSBASE(%rdi),%rdx
731c9de630fSguenther	movl	%edx,%eax
732c9de630fSguenther	shrq	$32,%rdx
733c9de630fSguenther	movl	$MSR_FSBASE,%ecx
734c9de630fSguenther	wrmsr
735c9de630fSguenther	jmp	.Lsyscall_restore_registers
736b767b017Sguenther
737f5df1827Smickey#ifdef DIAGNOSTIC
738c9ad316fSguenther.Lsyscall_spl_not_lowered:
739be97ab8cSguenther	leaq	spl_lowered(%rip), %rdi
740c4495499Sguenther	movl	TF_ERR(%rsp),%esi	/* syscall # stashed above */
741f5df1827Smickey	movl	TF_RDI(%rsp),%edx
742f5df1827Smickey	movl	%ebx,%ecx
743b5b9857bSart	movl	CPUVAR(ILEVEL),%r8d
744f5df1827Smickey	xorq	%rax,%rax
745f5df1827Smickey	call	_C_LABEL(printf)
746f5df1827Smickey#ifdef DDB
747f5df1827Smickey	int	$3
748f5df1827Smickey#endif /* DDB */
749f5df1827Smickey	movl	$IPL_NONE,CPUVAR(ILEVEL)
750c9ad316fSguenther	jmp	.Lsyscall_check_asts
751f5df1827Smickey
75232d5845fSderaadt	.section .rodata
75332d5845fSderaadtspl_lowered:
75432d5845fSderaadt	.asciz	"WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
75532d5845fSderaadt	.text
75632d5845fSderaadt#endif
757f5df1827Smickey
758f5df1827SmickeyNENTRY(proc_trampoline)
759f5df1827Smickey#ifdef MULTIPROCESSOR
760f5df1827Smickey	call	_C_LABEL(proc_trampoline_mp)
761f5df1827Smickey#endif
762f5df1827Smickey	movl	$IPL_NONE,CPUVAR(ILEVEL)
763f5df1827Smickey	movq	%r13,%rdi
764421775b1Sguenther	movq	%r12,%rax
765421775b1Sguenther	call	retpoline_rax
766c9ad316fSguenther	movq	CPUVAR(CURPROC),%r14
767c9ad316fSguenther	jmp	.Lsyscall_check_asts
768f5df1827Smickey
769f5df1827Smickey
770f5df1827Smickey/*
77131b8ac92Sguenther * Returning to userspace via iretq.  We do things in this order:
77231b8ac92Sguenther *  - check for ASTs
773c9de630fSguenther *  - restore FPU/"extended CPU state" if it's not already in the CPU
77431b8ac92Sguenther *  - DIAGNOSTIC: no more C calls after this, so check the SPL
77531b8ac92Sguenther *  - restore FS.base if it's not already in the CPU
776c9de630fSguenther *  - restore most registers
77731b8ac92Sguenther *  - update the iret frame from the trapframe
77831b8ac92Sguenther *  - finish reading from the trapframe
77931b8ac92Sguenther *  - switch to the trampoline stack	\
78031b8ac92Sguenther *  - jump to the .kutext segment	|-- Meltdown workaround
78131b8ac92Sguenther *  - switch to the user page tables	/
78231b8ac92Sguenther *  - swapgs
78331b8ac92Sguenther *  - iretq
7841396572dSguenther */
7851fc8fad1SguentherKTEXT_PAGE_START
7861fc8fad1Sguenther        _ALIGN_TRAPS
7871fc8fad1SguentherGENTRY(intr_user_exit)
788b767b017Sguenther#ifdef DIAGNOSTIC
789b767b017Sguenther	pushfq
790b767b017Sguenther	popq	%rdx
791b767b017Sguenther	testq	$PSL_I,%rdx
79231b8ac92Sguenther	jnz	.Lintr_user_exit_not_blocked
793b767b017Sguenther#endif /* DIAGNOSTIC */
79431b8ac92Sguenther
79531b8ac92Sguenther	/* Check for ASTs */
79631b8ac92Sguenther	CHECK_ASTPENDING(%r11)
79731b8ac92Sguenther	je	intr_user_exit_post_ast
79831b8ac92Sguenther	CLEAR_ASTPENDING(%r11)
79931b8ac92Sguenther	sti
80031b8ac92Sguenther	movq	%rsp,%rdi
80131b8ac92Sguenther	call	_C_LABEL(ast)
80231b8ac92Sguenther	cli
80331b8ac92Sguenther	jmp	intr_user_exit
80431b8ac92Sguenther
80531b8ac92Sguentherintr_user_exit_post_ast:
806c9de630fSguenther	/* Restore FPU/"extended CPU state" if it's not already in the CPU */
807c9de630fSguenther	testl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
808c9de630fSguenther	jz	.Lintr_restore_xstate
809c9de630fSguenther
81031b8ac92Sguenther#ifdef DIAGNOSTIC
81131b8ac92Sguenther	/* no more C calls after this, so check the SPL */
81231b8ac92Sguenther	cmpl	$0,CPUVAR(ILEVEL)
81331b8ac92Sguenther	jne	.Luser_spl_not_lowered
81431b8ac92Sguenther#endif /* DIAGNOSTIC */
81531b8ac92Sguenther
816c9de630fSguenther	/* Restore FS.base if it's not already in the CPU */
817c9de630fSguenther	testl	$CPUF_USERSEGS,CPUVAR(FLAGS)
818c9de630fSguenther	jz	.Lintr_restore_fsbase
819c9de630fSguenther
820c9de630fSguenther.Lintr_restore_registers:
821a4858df8Sguenther	RET_STACK_REFILL_WITH_RCX
822a4858df8Sguenther
8231396572dSguenther	movq	TF_RDI(%rsp),%rdi
8241396572dSguenther	movq	TF_RSI(%rsp),%rsi
8251396572dSguenther	movq	TF_R8(%rsp),%r8
8261396572dSguenther	movq	TF_R9(%rsp),%r9
8271396572dSguenther	movq	TF_R10(%rsp),%r10
8281396572dSguenther	movq	TF_R12(%rsp),%r12
8291396572dSguenther	movq	TF_R13(%rsp),%r13
8301396572dSguenther	movq	TF_R14(%rsp),%r14
8311396572dSguenther	movq	TF_R15(%rsp),%r15
8321396572dSguenther	movq	TF_RBP(%rsp),%rbp
8331396572dSguenther	movq	TF_RBX(%rsp),%rbx
8341396572dSguenther
835b767b017Sguenther	/*
836b767b017Sguenther	 * To get the final value for the register that was used
837b767b017Sguenther	 * for the mov to %cr3, we need access to somewhere accessible
838b767b017Sguenther	 * on the user page tables, so we save it in CPUVAR(SCRATCH)
839b767b017Sguenther	 * across the switch.
840b767b017Sguenther	 */
841b767b017Sguenther	/* update iret frame */
842b767b017Sguenther	movq	CPUVAR(INTR_RSP),%rdx
843b767b017Sguenther	movq	$(GSEL(GUCODE_SEL,SEL_UPL)),IRETQ_CS(%rdx)
844b767b017Sguenther	movq	TF_RIP(%rsp),%rax
845b767b017Sguenther	movq	%rax,IRETQ_RIP(%rdx)
846b767b017Sguenther	movq	TF_RFLAGS(%rsp),%rax
847b767b017Sguenther	movq	%rax,IRETQ_RFLAGS(%rdx)
848b767b017Sguenther	movq	TF_RSP(%rsp),%rax
849b767b017Sguenther	movq	%rax,IRETQ_RSP(%rdx)
850b767b017Sguenther	movq	$(GSEL(GUDATA_SEL,SEL_UPL)),IRETQ_SS(%rdx)
851b767b017Sguenther	/* finish with the trap frame */
852b767b017Sguenther	movq	TF_RAX(%rsp),%rax
853b767b017Sguenther	movq	TF_RCX(%rsp),%rcx
854b767b017Sguenther	movq	TF_R11(%rsp),%r11
855b767b017Sguenther	/* switch to the trampoline stack */
856b767b017Sguenther	xchgq	%rdx,%rsp
857b767b017Sguenther	movq	TF_RDX(%rdx),%rdx
8581fc8fad1Sguenther	CODEPATCH_START
8591fc8fad1Sguenther	movq	%rax,CPUVAR(SCRATCH)
860b767b017Sguenther	movq	CPUVAR(USER_CR3),%rax
861b767b017Sguenther	movq	%rax,%cr3
8621fc8fad1SguentherXiretq_trampback:
8631fc8fad1Sguenther0:	pause
864*a1fa3538Sguenther	lfence
8651fc8fad1Sguenther	jmp	0b
8661fc8fad1Sguenther	.space	5,0xcc		/* pad to match "movq CPUVAR(SCRATCH),%rax" */
8671fc8fad1Sguenther	CODEPATCH_END(CPTAG_MELTDOWN_NOP)
868b767b017Sguenther	swapgs
869b767b017Sguenther
870b767b017Sguenther	.globl	_C_LABEL(doreti_iret)
871b767b017Sguenther_C_LABEL(doreti_iret):
872b767b017Sguenther	iretq
8731fc8fad1SguentherKTEXT_PAGE_END
8741fc8fad1Sguenther
8751fc8fad1SguentherKUTEXT_PAGE_START
8761fc8fad1Sguenther	.space	(Xiretq_trampback - Xsyscall_meltdown) - \
8771fc8fad1Sguenther		(. - XUsyscall_meltdown), 0xcc
8781fc8fad1Sguenther	movq	CPUVAR(SCRATCH),%rax
8791fc8fad1Sguenther	swapgs
8801fc8fad1Sguenther	iretq
8811fc8fad1SguentherKUTEXT_PAGE_END
882b767b017Sguenther
88331b8ac92Sguenther	.text
884b433e1a0Sguenther	_ALIGN_TRAPS
885c9de630fSguenther.Lintr_restore_xstate:		/* CPU doesn't have curproc's xstate */
886c9de630fSguenther	orl	$CPUF_USERXSTATE,CPUVAR(FLAGS)
887c9de630fSguenther	movq	CPUVAR(CURPCB),%rdi
888c9de630fSguenther#if PCB_SAVEFPU != 0
889c9de630fSguenther	addq	$PCB_SAVEFPU,%rdi
890c9de630fSguenther#endif
891c9de630fSguenther	movq	xsave_mask(%rip),%rsi
892c9de630fSguenther	call	xrstor_user
893c9de630fSguenther	testl	%eax,%eax
894c9de630fSguenther	jnz	.Lintr_xrstor_faulted
895c9de630fSguenther.Lintr_restore_fsbase:		/* CPU doesn't have curproc's FS.base */
896c9de630fSguenther	orl	$CPUF_USERSEGS,CPUVAR(FLAGS)
897c9de630fSguenther	movq	CPUVAR(CURPCB),%rdx
898c9de630fSguenther	movq	PCB_FSBASE(%rdx),%rdx
899c9de630fSguenther	movl	%edx,%eax
900c9de630fSguenther	shrq	$32,%rdx
901c9de630fSguenther	movl	$MSR_FSBASE,%ecx
902c9de630fSguenther	wrmsr
903c9de630fSguenther	jmp	.Lintr_restore_registers
904c9de630fSguenther
905c9de630fSguenther.Lintr_xrstor_faulted:
906c9de630fSguenther	/*
907c9de630fSguenther	 * xrstor faulted; we need to reset the FPU state and call trap()
908c9de630fSguenther	 * to post a signal, which requires interrupts be enabled.
909c9de630fSguenther	 */
910c9de630fSguenther	sti
911c9de630fSguenther	movq	proc0paddr(%rip),%rdi
912c9de630fSguenther#if PCB_SAVEFPU != 0
913c9de630fSguenther	addq	$PCB_SAVEFPU,%rdi
914c9de630fSguenther#endif
915c9de630fSguenther	CODEPATCH_START
916c9de630fSguenther	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
917c9de630fSguenther	CODEPATCH_END(CPTAG_XRSTOR)
918c9de630fSguenther	movq	$T_PROTFLT,TF_TRAPNO(%rsp)
919c9de630fSguenther	jmp	recall_trap
920c9de630fSguenther
921c9de630fSguenther#ifdef DIAGNOSTIC
92231b8ac92Sguenther.Lintr_user_exit_not_blocked:
92331b8ac92Sguenther	movl	warn_once(%rip),%edi
92431b8ac92Sguenther	testl	%edi,%edi
92531b8ac92Sguenther	jnz	1f
92631b8ac92Sguenther	incl	%edi
92731b8ac92Sguenther	movl	%edi,warn_once(%rip)
92831b8ac92Sguenther	leaq	.Lnot_blocked(%rip),%rdi
92931b8ac92Sguenther	call	_C_LABEL(printf)
93031b8ac92Sguenther#ifdef DDB
93131b8ac92Sguenther	int	$3
93231b8ac92Sguenther#endif /* DDB */
93331b8ac92Sguenther1:	cli
93431b8ac92Sguenther	jmp	intr_user_exit
93531b8ac92Sguenther
93631b8ac92Sguenther.Luser_spl_not_lowered:
93731b8ac92Sguenther	sti
93831b8ac92Sguenther	leaq	intr_spl_lowered(%rip),%rdi
93931b8ac92Sguenther	movl	CPUVAR(ILEVEL),%esi
94031b8ac92Sguenther	xorl	%edx,%edx		/* always SPL zero for userspace */
94131b8ac92Sguenther	xorl	%eax,%eax
94231b8ac92Sguenther	call	_C_LABEL(printf)
94331b8ac92Sguenther#ifdef DDB
94431b8ac92Sguenther	int	$3
94531b8ac92Sguenther#endif /* DDB */
94631b8ac92Sguenther	movl	$0,CPUVAR(ILEVEL)
94731b8ac92Sguenther	cli
94831b8ac92Sguenther	jmp	intr_user_exit
94931b8ac92Sguenther
95031b8ac92Sguenther	.section .rodata
95131b8ac92Sguentherintr_spl_lowered:
95231b8ac92Sguenther	.asciz	"WARNING: SPL NOT LOWERED ON TRAP EXIT %x %x\n"
95331b8ac92Sguenther	.text
95431b8ac92Sguenther#endif /* DIAGNOSTIC */
95531b8ac92Sguenther
95631b8ac92Sguenther
95731b8ac92Sguenther/*
95831b8ac92Sguenther * Return to supervisor mode from trap or interrupt
95931b8ac92Sguenther */
96031b8ac92SguentherNENTRY(intr_fast_exit)
96131b8ac92Sguenther#ifdef DIAGNOSTIC
96231b8ac92Sguenther	pushfq
96331b8ac92Sguenther	popq	%rdx
96431b8ac92Sguenther	testq	$PSL_I,%rdx
96531b8ac92Sguenther	jnz	.Lintr_exit_not_blocked
96631b8ac92Sguenther#endif /* DIAGNOSTIC */
96731b8ac92Sguenther	movq	TF_RDI(%rsp),%rdi
96831b8ac92Sguenther	movq	TF_RSI(%rsp),%rsi
96931b8ac92Sguenther	movq	TF_R8(%rsp),%r8
97031b8ac92Sguenther	movq	TF_R9(%rsp),%r9
97131b8ac92Sguenther	movq	TF_R10(%rsp),%r10
97231b8ac92Sguenther	movq	TF_R12(%rsp),%r12
97331b8ac92Sguenther	movq	TF_R13(%rsp),%r13
97431b8ac92Sguenther	movq	TF_R14(%rsp),%r14
97531b8ac92Sguenther	movq	TF_R15(%rsp),%r15
97631b8ac92Sguenther	movq	TF_RBP(%rsp),%rbp
97731b8ac92Sguenther	movq	TF_RBX(%rsp),%rbx
978b767b017Sguenther	movq	TF_RDX(%rsp),%rdx
9791396572dSguenther	movq	TF_RCX(%rsp),%rcx
9801396572dSguenther	movq	TF_R11(%rsp),%r11
9811396572dSguenther	movq	TF_RAX(%rsp),%rax
9821396572dSguenther	addq	$TF_RIP,%rsp
983c6853312Sguenther	iretq
984c6853312Sguenther
985b767b017Sguenther#ifdef DIAGNOSTIC
986b767b017Sguenther.Lintr_exit_not_blocked:
987b767b017Sguenther	movl	warn_once(%rip),%edi
988b767b017Sguenther	testl	%edi,%edi
989b767b017Sguenther	jnz	1f
990b767b017Sguenther	incl	%edi
991b767b017Sguenther	movl	%edi,warn_once(%rip)
992b767b017Sguenther	leaq	.Lnot_blocked(%rip),%rdi
993b767b017Sguenther	call	_C_LABEL(printf)
994b767b017Sguenther#ifdef DDB
995b767b017Sguenther	int	$3
996b767b017Sguenther#endif /* DDB */
997b767b017Sguenther1:	cli
998b767b017Sguenther	jmp	intr_fast_exit
999b767b017Sguenther
1000b767b017Sguenther	.data
1001b767b017Sguenther.global warn_once
1002b767b017Sguentherwarn_once:
1003b767b017Sguenther	.long	0
1004b767b017Sguenther	.section .rodata
1005b767b017Sguenther.Lnot_blocked:
1006b767b017Sguenther	.asciz	"WARNING: INTERRUPTS NOT BLOCKED ON INTERRUPT RETURN: 0x%x 0x%x\n"
1007b767b017Sguenther	.text
1008b767b017Sguenther#endif
10096950c8e2Smpi
1010c9de630fSguenther/*
1011c9de630fSguenther * FPU/"extended CPU state" handling
1012c9de630fSguenther * 	int xrstor_user(sfp, mask)
1013c9de630fSguenther *		load given state, returns 0/1 if okay/it trapped
1014c9de630fSguenther *	void fpusave(sfp)
1015c9de630fSguenther *		save current state, but retain it in the FPU
1016c9de630fSguenther *	void fpusavereset(sfp)
1017c9de630fSguenther *		save current state and reset FPU to initial/kernel state
1018c9de630fSguenther */
1019c9de630fSguenther
1020b1cdcaf5SguentherENTRY(xrstor_user)
1021db0a8dc5Smortimer	RETGUARD_SETUP(xrstor_user, r11)
1022b1cdcaf5Sguenther	movq	%rsi, %rdx
1023b1cdcaf5Sguenther	movl	%esi, %eax
1024b1cdcaf5Sguenther	shrq	$32, %rdx
1025b1cdcaf5Sguenther	.globl	xrstor_fault
1026b1cdcaf5Sguentherxrstor_fault:
1027c9de630fSguenther	CODEPATCH_START
1028c9de630fSguenther	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
1029c9de630fSguenther	CODEPATCH_END(CPTAG_XRSTOR)
1030198d2c0bSguenther	xorl	%eax, %eax
1031db0a8dc5Smortimer	RETGUARD_CHECK(xrstor_user, r11)
1032198d2c0bSguenther	ret
1033be97ab8cSguentherNENTRY(xrstor_resume)
1034198d2c0bSguenther	movl	$1, %eax
1035db0a8dc5Smortimer	RETGUARD_CHECK(xrstor_user, r11)
1036b1cdcaf5Sguenther	ret
1037c9de630fSguentherEND(xrstor_user)
1038c9de630fSguenther
1039c9de630fSguentherENTRY(fpusave)
1040db0a8dc5Smortimer	RETGUARD_SETUP(fpusave, r11)
1041c9de630fSguenther	movq	xsave_mask(%rip),%rdx
1042c9de630fSguenther	movl	%edx,%eax
1043c9de630fSguenther	shrq	$32,%rdx
1044c9de630fSguenther	CODEPATCH_START
1045c9de630fSguenther	.byte 0x48; fxsave	(%rdi)		/* really fxsave64 */
1046c9de630fSguenther	CODEPATCH_END(CPTAG_XSAVE)
1047db0a8dc5Smortimer	RETGUARD_CHECK(fpusave, r11)
1048c9de630fSguenther	ret
1049c9de630fSguentherEND(fpusave)
1050c9de630fSguenther
1051c9de630fSguentherENTRY(fpusavereset)
1052db0a8dc5Smortimer	RETGUARD_SETUP(fpusavereset, r11)
1053c9de630fSguenther	movq	xsave_mask(%rip),%rdx
1054c9de630fSguenther	movl	%edx,%eax
1055c9de630fSguenther	shrq	$32,%rdx
1056c9de630fSguenther	CODEPATCH_START
1057c9de630fSguenther	.byte 0x48; fxsave	(%rdi)		/* really fxsave64 */
1058c9de630fSguenther	CODEPATCH_END(CPTAG_XSAVE)
1059c9de630fSguenther	movq	proc0paddr(%rip),%rdi
1060c9de630fSguenther#if PCB_SAVEFPU != 0
1061c9de630fSguenther	addq	$PCB_SAVEFPU,%rdi
1062c9de630fSguenther#endif
1063c9de630fSguenther	CODEPATCH_START
1064c9de630fSguenther	.byte 0x48; fxrstor	(%rdi)		/* really fxrstor64 */
1065c9de630fSguenther	CODEPATCH_END(CPTAG_XRSTOR)
1066db0a8dc5Smortimer	RETGUARD_CHECK(fpusavereset, r11)
1067c9de630fSguenther	ret
1068c9de630fSguentherEND(fpusavereset)
1069c9de630fSguenther
1070c9de630fSguenther	.section .rodata
1071c9de630fSguenther	.globl	_C_LABEL(_xrstor)
1072c9de630fSguenther_C_LABEL(_xrstor):
1073c9de630fSguenther	.byte 0x48; xrstor	(%rdi)		/* really xrstor64 */
1074c9de630fSguenther
1075c9de630fSguenther	.globl	_C_LABEL(_xsave)
1076c9de630fSguenther_C_LABEL(_xsave):
1077c9de630fSguenther	.byte 0x48; xsave	(%rdi)		/* really xsave64 */
1078c9de630fSguenther
1079c9de630fSguenther	.globl	_C_LABEL(_xsaveopt)
1080c9de630fSguenther_C_LABEL(_xsaveopt):
1081c9de630fSguenther	.byte 0x48; xsaveopt	(%rdi)		/* really xsaveopt64 */
1082b1cdcaf5Sguenther
10833a36161cSartENTRY(pagezero)
1084db0a8dc5Smortimer	RETGUARD_SETUP(pagezero, r11)
10853a36161cSart	movq    $-PAGE_SIZE,%rdx
10863a36161cSart	subq    %rdx,%rdi
10873a36161cSart	xorq    %rax,%rax
10883a36161cSart1:
10893a36161cSart	movnti  %rax,(%rdi,%rdx)
10903a36161cSart	movnti  %rax,8(%rdi,%rdx)
10913a36161cSart	movnti  %rax,16(%rdi,%rdx)
10923a36161cSart	movnti  %rax,24(%rdi,%rdx)
10933a36161cSart	addq    $32,%rdx
10943a36161cSart	jne     1b
10953a36161cSart	sfence
1096db0a8dc5Smortimer	RETGUARD_CHECK(pagezero, r11)
10973a36161cSart	ret
10983c8478a6Sgwk
1099d8213a49Smikeb#if NXEN > 0
1100d8213a49Smikeb	/* Hypercall page needs to be page aligned */
1101d8213a49Smikeb	.text
11020175496dSderaadt	.align	NBPG, 0xcc
1103d8213a49Smikeb	.globl	_C_LABEL(xen_hypercall_page)
1104d8213a49Smikeb_C_LABEL(xen_hypercall_page):
11050175496dSderaadt	.skip	0x1000, 0xcc
1106d8213a49Smikeb#endif /* NXEN > 0 */
1107218ead0bSmikeb
1108218ead0bSmikeb#if NHYPERV > 0
1109218ead0bSmikeb	/* Hypercall page needs to be page aligned */
1110218ead0bSmikeb	.text
1111218ead0bSmikeb	.align	NBPG, 0xcc
1112218ead0bSmikeb	.globl	_C_LABEL(hv_hypercall_page)
1113218ead0bSmikeb_C_LABEL(hv_hypercall_page):
1114218ead0bSmikeb	.skip	0x1000, 0xcc
1115218ead0bSmikeb#endif /* NXEN > 0 */
1116