xref: /freebsd/sys/amd64/amd64/exception.S (revision 148a8da8)
1/*-
2 * Copyright (c) 1989, 1990 William F. Jolitz.
3 * Copyright (c) 1990 The Regents of the University of California.
4 * Copyright (c) 2007-2018 The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed by A. Joseph Koshy under
8 * sponsorship from the FreeBSD Foundation and Google, Inc.
9 *
10 * Portions of this software were developed by
11 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
12 * the FreeBSD Foundation.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * $FreeBSD$
39 */
40
41#include "opt_atpic.h"
42#include "opt_hwpmc_hooks.h"
43
44#include "assym.inc"
45
46#include <machine/psl.h>
47#include <machine/asmacros.h>
48#include <machine/trap.h>
49#include <machine/specialreg.h>
50
51#ifdef KDTRACE_HOOKS
52	.bss
53	.globl	dtrace_invop_jump_addr
54	.align	8
55	.type	dtrace_invop_jump_addr,@object
56	.size	dtrace_invop_jump_addr,8
57dtrace_invop_jump_addr:
58	.zero	8
59	.globl	dtrace_invop_calltrap_addr
60	.align	8
61	.type	dtrace_invop_calltrap_addr,@object
62	.size	dtrace_invop_calltrap_addr,8
63dtrace_invop_calltrap_addr:
64	.zero	8
65#endif
66	.text
67#ifdef HWPMC_HOOKS
68	ENTRY(start_exceptions)
69#endif
70
71/*****************************************************************************/
72/* Trap handling                                                             */
73/*****************************************************************************/
74/*
75 * Trap and fault vector routines.
76 *
77 * All traps are 'interrupt gates', SDT_SYSIGT.  An interrupt gate pushes
78 * state on the stack but also disables interrupts.  This is important for
79 * us for the use of the swapgs instruction.  We cannot be interrupted
80 * until the GS.base value is correct.  For most traps, we automatically
81 * then enable interrupts if the interrupted context had them enabled.
82 * This is equivalent to the i386 port's use of SDT_SYS386TGT.
83 *
84 * The cpu will push a certain amount of state onto the kernel stack for
85 * the current process.  See amd64/include/frame.h.
86 * This includes the current RFLAGS (status register, which includes
87 * the interrupt disable state prior to the trap), the code segment register,
88 * and the return instruction pointer are pushed by the cpu.  The cpu
89 * will also push an 'error' code for certain traps.  We push a dummy
90 * error code for those traps where the cpu doesn't in order to maintain
91 * a consistent frame.  We also push a contrived 'trap number'.
92 *
93 * The CPU does not push the general registers, so we must do that, and we
94 * must restore them prior to calling 'iret'.  The CPU adjusts %cs and %ss
95 * but does not mess with %ds, %es, %gs or %fs.  We swap the %gs base for
96 * for the kernel mode operation shortly, without changes to the selector
97 * loaded.  Since superuser long mode works with any selectors loaded into
98 * segment registers other then %cs, which makes them mostly unused in long
99 * mode, and kernel does not reference %fs, leave them alone.  The segment
100 * registers are reloaded on return to the usermode.
101 */
102
103MCOUNT_LABEL(user)
104MCOUNT_LABEL(btrap)
105
106/* Traps that we leave interrupts disabled for. */
107	.macro	TRAP_NOEN	l, trapno
108	PTI_ENTRY	\l,X\l
109	.globl	X\l
110	.type	X\l,@function
111X\l:	subq $TF_RIP,%rsp
112	movl $\trapno,TF_TRAPNO(%rsp)
113	movq $0,TF_ADDR(%rsp)
114	movq $0,TF_ERR(%rsp)
115	jmp alltraps_noen
116	.endm
117
118	TRAP_NOEN	bpt, T_BPTFLT
119#ifdef KDTRACE_HOOKS
120	TRAP_NOEN	dtrace_ret, T_DTRACE_RET
121#endif
122
123/* Regular traps; The cpu does not supply tf_err for these. */
124	.macro	TRAP	l, trapno
125	PTI_ENTRY	\l,X\l
126	.globl	X\l
127	.type	X\l,@function
128X\l:
129	subq $TF_RIP,%rsp
130	movl $\trapno,TF_TRAPNO(%rsp)
131	movq $0,TF_ADDR(%rsp)
132	movq $0,TF_ERR(%rsp)
133	jmp alltraps
134	.endm
135
136	TRAP	div, T_DIVIDE
137	TRAP	ofl, T_OFLOW
138	TRAP	bnd, T_BOUND
139	TRAP	ill, T_PRIVINFLT
140	TRAP	dna, T_DNA
141	TRAP	fpusegm, T_FPOPFLT
142	TRAP	rsvd, T_RESERVED
143	TRAP	fpu, T_ARITHTRAP
144	TRAP	xmm, T_XMMFLT
145
146/* This group of traps have tf_err already pushed by the cpu. */
147	.macro	TRAP_ERR	l, trapno
148	PTI_ENTRY	\l,X\l,has_err=1
149	.globl	X\l
150	.type	X\l,@function
151X\l:
152	subq $TF_ERR,%rsp
153	movl $\trapno,TF_TRAPNO(%rsp)
154	movq $0,TF_ADDR(%rsp)
155	jmp alltraps
156	.endm
157
158	TRAP_ERR	tss, T_TSSFLT
159	TRAP_ERR	align, T_ALIGNFLT
160
161	/*
162	 * alltraps entry point.  Use swapgs if this is the first time in the
163	 * kernel from userland.  Reenable interrupts if they were enabled
164	 * before the trap.  This approximates SDT_SYS386TGT on the i386 port.
165	 */
166	SUPERALIGN_TEXT
167	.globl	alltraps
168	.type	alltraps,@function
169alltraps:
170	movq	%rdi,TF_RDI(%rsp)
171	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
172	jz	1f		/* already running with kernel GS.base */
173	swapgs
174	movq	PCPU(CURPCB),%rdi
175	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
1761:	SAVE_SEGS
177	movq	%rdx,TF_RDX(%rsp)
178	movq	%rax,TF_RAX(%rsp)
179	movq	%rcx,TF_RCX(%rsp)
180	testb	$SEL_RPL_MASK,TF_CS(%rsp)
181	jz	2f
182	call	handle_ibrs_entry
1832:	testl	$PSL_I,TF_RFLAGS(%rsp)
184	jz	alltraps_pushregs_no_rax
185	sti
186alltraps_pushregs_no_rax:
187	movq	%rsi,TF_RSI(%rsp)
188	movq	%r8,TF_R8(%rsp)
189	movq	%r9,TF_R9(%rsp)
190	movq	%rbx,TF_RBX(%rsp)
191	movq	%rbp,TF_RBP(%rsp)
192	movq	%r10,TF_R10(%rsp)
193	movq	%r11,TF_R11(%rsp)
194	movq	%r12,TF_R12(%rsp)
195	movq	%r13,TF_R13(%rsp)
196	movq	%r14,TF_R14(%rsp)
197	movq	%r15,TF_R15(%rsp)
198	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
199	pushfq
200	andq	$~(PSL_D | PSL_AC),(%rsp)
201	popfq
202	FAKE_MCOUNT(TF_RIP(%rsp))
203#ifdef KDTRACE_HOOKS
204	/*
205	 * DTrace Function Boundary Trace (fbt) probes are triggered
206	 * by int3 (0xcc) which causes the #BP (T_BPTFLT) breakpoint
207	 * interrupt. For all other trap types, just handle them in
208	 * the usual way.
209	 */
210	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
211	jnz	calltrap		/* ignore userland traps */
212	cmpl	$T_BPTFLT,TF_TRAPNO(%rsp)
213	jne	calltrap
214
215	/* Check if there is no DTrace hook registered. */
216	cmpq	$0,dtrace_invop_jump_addr
217	je	calltrap
218
219	/*
220	 * Set our jump address for the jump back in the event that
221	 * the breakpoint wasn't caused by DTrace at all.
222	 */
223	movq	$calltrap,dtrace_invop_calltrap_addr(%rip)
224
225	/* Jump to the code hooked in by DTrace. */
226	jmpq	*dtrace_invop_jump_addr
227#endif
228	.globl	calltrap
229	.type	calltrap,@function
230calltrap:
231	movq	%rsp,%rdi
232	call	trap_check
233	MEXITCOUNT
234	jmp	doreti			/* Handle any pending ASTs */
235
236	/*
237	 * alltraps_noen entry point.  Unlike alltraps above, we want to
238	 * leave the interrupts disabled.  This corresponds to
239	 * SDT_SYS386IGT on the i386 port.
240	 */
241	SUPERALIGN_TEXT
242	.globl	alltraps_noen
243	.type	alltraps_noen,@function
244alltraps_noen:
245	movq	%rdi,TF_RDI(%rsp)
246	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
247	jz	1f /* already running with kernel GS.base */
248	swapgs
249	movq	PCPU(CURPCB),%rdi
250	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
2511:	SAVE_SEGS
252	movq	%rdx,TF_RDX(%rsp)
253	movq	%rax,TF_RAX(%rsp)
254	movq	%rcx,TF_RCX(%rsp)
255	testb	$SEL_RPL_MASK,TF_CS(%rsp)
256	jz	alltraps_pushregs_no_rax
257	call	handle_ibrs_entry
258	jmp	alltraps_pushregs_no_rax
259
260IDTVEC(dblfault)
261	subq	$TF_ERR,%rsp
262	movl	$T_DOUBLEFLT,TF_TRAPNO(%rsp)
263	movq	$0,TF_ADDR(%rsp)
264	movq	$0,TF_ERR(%rsp)
265	movq	%rdi,TF_RDI(%rsp)
266	movq	%rsi,TF_RSI(%rsp)
267	movq	%rdx,TF_RDX(%rsp)
268	movq	%rcx,TF_RCX(%rsp)
269	movq	%r8,TF_R8(%rsp)
270	movq	%r9,TF_R9(%rsp)
271	movq	%rax,TF_RAX(%rsp)
272	movq	%rbx,TF_RBX(%rsp)
273	movq	%rbp,TF_RBP(%rsp)
274	movq	%r10,TF_R10(%rsp)
275	movq	%r11,TF_R11(%rsp)
276	movq	%r12,TF_R12(%rsp)
277	movq	%r13,TF_R13(%rsp)
278	movq	%r14,TF_R14(%rsp)
279	movq	%r15,TF_R15(%rsp)
280	SAVE_SEGS
281	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
282	pushfq
283	andq	$~(PSL_D | PSL_AC),(%rsp)
284	popfq
285	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
286	jz	1f			/* already running with kernel GS.base */
287	swapgs
2881:
289	movq	PCPU(KCR3),%rax
290	cmpq	$~0,%rax
291	je	2f
292	movq	%rax,%cr3
2932:	movq	%rsp,%rdi
294	call	dblfault_handler
2953:	hlt
296	jmp	3b
297
298	ALIGN_TEXT
299IDTVEC(page_pti)
300	testb	$SEL_RPL_MASK,PTI_CS-2*8(%rsp)
301	jz	Xpage
302	swapgs
303	pushq	%rax
304	movq	%cr3,%rax
305	movq	%rax,PCPU(SAVED_UCR3)
306	cmpq	$~0,PCPU(UCR3)
307	jne	1f
308	popq	%rax
309	jmp	2f
3101:	pushq	%rdx
311	PTI_UUENTRY has_err=1
3122:	subq	$TF_ERR,%rsp
313	movq	%rdi,TF_RDI(%rsp)
314	movq	%rax,TF_RAX(%rsp)
315	movq	%rdx,TF_RDX(%rsp)
316	movq	%rcx,TF_RCX(%rsp)
317	jmp	page_u
318IDTVEC(page)
319	subq	$TF_ERR,%rsp
320	movq	%rdi,TF_RDI(%rsp)	/* free up GP registers */
321	movq	%rax,TF_RAX(%rsp)
322	movq	%rdx,TF_RDX(%rsp)
323	movq	%rcx,TF_RCX(%rsp)
324	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
325	jz	page_cr2		/* already running with kernel GS.base */
326	swapgs
327page_u:	movq	PCPU(CURPCB),%rdi
328	andl	$~PCB_FULL_IRET,PCB_FLAGS(%rdi)
329	movq	PCPU(SAVED_UCR3),%rax
330	movq	%rax,PCB_SAVED_UCR3(%rdi)
331	call	handle_ibrs_entry
332page_cr2:
333	movq	%cr2,%rdi		/* preserve %cr2 before ..  */
334	movq	%rdi,TF_ADDR(%rsp)	/* enabling interrupts. */
335	SAVE_SEGS
336	movl	$T_PAGEFLT,TF_TRAPNO(%rsp)
337	testl	$PSL_I,TF_RFLAGS(%rsp)
338	jz	alltraps_pushregs_no_rax
339	sti
340	jmp	alltraps_pushregs_no_rax
341
342	/*
343	 * We have to special-case this one.  If we get a trap in doreti() at
344	 * the iretq stage, we'll reenter with the wrong gs state.  We'll have
345	 * to do a special the swapgs in this case even coming from the kernel.
346	 * XXX linux has a trap handler for their equivalent of load_gs().
347	 *
348	 * On the stack, we have the hardware interrupt frame to return
349	 * to usermode (faulted) and another frame with error code, for
350	 * fault.  For PTI, copy both frames to the main thread stack.
351	 * Handle the potential 16-byte alignment adjustment incurred
352	 * during the second fault by copying both frames independently
353	 * while unwinding the stack in between.
354	 */
355	.macro PROTF_ENTRY name,trapno
356\name\()_pti_doreti:
357	swapgs
358	cmpq	$~0,PCPU(UCR3)
359	je	1f
360	pushq	%rax
361	pushq	%rdx
362	movq	PCPU(KCR3),%rax
363	movq	%rax,%cr3
364	movq	PCPU(RSP0),%rax
365	subq	$2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
366	MOVE_STACKS	(PTI_SIZE / 8)
367	addq	$PTI_SIZE,%rax
368	movq	PTI_RSP(%rsp),%rsp
369	MOVE_STACKS	(PTI_SIZE / 8 - 3)
370	subq	$PTI_SIZE,%rax
371	movq	%rax,%rsp
372	popq	%rdx
373	popq	%rax
3741:	swapgs
375	jmp	X\name
376IDTVEC(\name\()_pti)
377	cmpq	$doreti_iret,PTI_RIP-2*8(%rsp)
378	je	\name\()_pti_doreti
379	testb	$SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
380	jz	X\name
381	PTI_UENTRY has_err=1
382	swapgs
383IDTVEC(\name)
384	subq	$TF_ERR,%rsp
385	movl	$\trapno,TF_TRAPNO(%rsp)
386	jmp	prot_addrf
387	.endm
388
389	PROTF_ENTRY	missing, T_SEGNPFLT
390	PROTF_ENTRY	stk, T_STKFLT
391	PROTF_ENTRY	prot, T_PROTFLT
392
393prot_addrf:
394	movq	$0,TF_ADDR(%rsp)
395	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
396	movq	%rax,TF_RAX(%rsp)
397	movq	%rdx,TF_RDX(%rsp)
398	movq	%rcx,TF_RCX(%rsp)
399	movw	%fs,TF_FS(%rsp)
400	movw	%gs,TF_GS(%rsp)
401	leaq	doreti_iret(%rip),%rdi
402	cmpq	%rdi,TF_RIP(%rsp)
403	je	5f			/* kernel but with user gsbase!! */
404	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
405	jz	6f			/* already running with kernel GS.base */
406	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
407	jz	2f
408	cmpw	$KUF32SEL,TF_FS(%rsp)
409	jne	1f
410	rdfsbase %rax
4111:	cmpw	$KUG32SEL,TF_GS(%rsp)
412	jne	2f
413	rdgsbase %rdx
4142:	swapgs
415	movq	PCPU(CURPCB),%rdi
416	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
417	jz	4f
418	cmpw	$KUF32SEL,TF_FS(%rsp)
419	jne	3f
420	movq	%rax,PCB_FSBASE(%rdi)
4213:	cmpw	$KUG32SEL,TF_GS(%rsp)
422	jne	4f
423	movq	%rdx,PCB_GSBASE(%rdi)
4244:	call	handle_ibrs_entry
425	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)	/* always full iret from GPF */
426	movw	%es,TF_ES(%rsp)
427	movw	%ds,TF_DS(%rsp)
428	testl	$PSL_I,TF_RFLAGS(%rsp)
429	jz	alltraps_pushregs_no_rax
430	sti
431	jmp	alltraps_pushregs_no_rax
432
4335:	swapgs
4346:	movq	PCPU(CURPCB),%rdi
435	jmp	4b
436
437/*
438 * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
439 * and the new privilige level.  We are still running on the old user stack
440 * pointer.  We have to juggle a few things around to find our stack etc.
441 * swapgs gives us access to our PCPU space only.
442 *
443 * We do not support invoking this from a custom segment registers,
444 * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
445 */
446	SUPERALIGN_TEXT
447IDTVEC(fast_syscall_pti)
448	swapgs
449	movq	%rax,PCPU(SCRATCH_RAX)
450	cmpq	$~0,PCPU(UCR3)
451	je	fast_syscall_common
452	movq	PCPU(KCR3),%rax
453	movq	%rax,%cr3
454	jmp	fast_syscall_common
455	SUPERALIGN_TEXT
456IDTVEC(fast_syscall)
457	swapgs
458	movq	%rax,PCPU(SCRATCH_RAX)
459fast_syscall_common:
460	movq	%rsp,PCPU(SCRATCH_RSP)
461	movq	PCPU(RSP0),%rsp
462	/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
463	subq	$TF_SIZE,%rsp
464	/* defer TF_RSP till we have a spare register */
465	movq	%r11,TF_RFLAGS(%rsp)
466	movq	%rcx,TF_RIP(%rsp)	/* %rcx original value is in %r10 */
467	movq	PCPU(SCRATCH_RSP),%r11	/* %r11 already saved */
468	movq	%r11,TF_RSP(%rsp)	/* user stack pointer */
469	movq	PCPU(SCRATCH_RAX),%rax
470	/*
471	 * Save a few arg registers early to free them for use in
472	 * handle_ibrs_entry().  %r10 is especially tricky.  It is not an
473	 * arg register, but it holds the arg register %rcx.  Profiling
474	 * preserves %rcx, but may clobber %r10.  Profiling may also
475	 * clobber %r11, but %r11 (original %eflags) has been saved.
476	 */
477	movq	%rax,TF_RAX(%rsp)	/* syscall number */
478	movq	%rdx,TF_RDX(%rsp)	/* arg 3 */
479	movq	%r10,TF_RCX(%rsp)	/* arg 4 */
480	SAVE_SEGS
481	call	handle_ibrs_entry
482	movq	PCPU(CURPCB),%r11
483	andl	$~PCB_FULL_IRET,PCB_FLAGS(%r11)
484	sti
485	movq	$KUDSEL,TF_SS(%rsp)
486	movq	$KUCSEL,TF_CS(%rsp)
487	movq	$2,TF_ERR(%rsp)
488	movq	%rdi,TF_RDI(%rsp)	/* arg 1 */
489	movq	%rsi,TF_RSI(%rsp)	/* arg 2 */
490	movq	%r8,TF_R8(%rsp)		/* arg 5 */
491	movq	%r9,TF_R9(%rsp)		/* arg 6 */
492	movq	%rbx,TF_RBX(%rsp)	/* C preserved */
493	movq	%rbp,TF_RBP(%rsp)	/* C preserved */
494	movq	%r12,TF_R12(%rsp)	/* C preserved */
495	movq	%r13,TF_R13(%rsp)	/* C preserved */
496	movq	%r14,TF_R14(%rsp)	/* C preserved */
497	movq	%r15,TF_R15(%rsp)	/* C preserved */
498	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
499	FAKE_MCOUNT(TF_RIP(%rsp))
500	movq	PCPU(CURTHREAD),%rdi
501	movq	%rsp,TD_FRAME(%rdi)
502	movl	TF_RFLAGS(%rsp),%esi
503	andl	$PSL_T,%esi
504	call	amd64_syscall
5051:	movq	PCPU(CURPCB),%rax
506	/* Disable interrupts before testing PCB_FULL_IRET. */
507	cli
508	testl	$PCB_FULL_IRET,PCB_FLAGS(%rax)
509	jnz	4f
510	/* Check for and handle AST's on return to userland. */
511	movq	PCPU(CURTHREAD),%rax
512	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
513	jne	3f
514	call	handle_ibrs_exit
515	/* Restore preserved registers. */
516	MEXITCOUNT
517	movq	TF_RDI(%rsp),%rdi	/* bonus; preserve arg 1 */
518	movq	TF_RSI(%rsp),%rsi	/* bonus: preserve arg 2 */
519	movq	TF_RDX(%rsp),%rdx	/* return value 2 */
520	movq	TF_RAX(%rsp),%rax	/* return value 1 */
521	movq	TF_RFLAGS(%rsp),%r11	/* original %rflags */
522	movq	TF_RIP(%rsp),%rcx	/* original %rip */
523	movq	TF_RSP(%rsp),%rsp	/* user stack pointer */
524	xorl	%r8d,%r8d		/* zero the rest of GPRs */
525	xorl	%r10d,%r10d
526	cmpq	$~0,PCPU(UCR3)
527	je	2f
528	movq	PCPU(UCR3),%r9
529	movq	%r9,%cr3
5302:	xorl	%r9d,%r9d
531	swapgs
532	sysretq
533
5343:	/* AST scheduled. */
535	sti
536	movq	%rsp,%rdi
537	call	ast
538	jmp	1b
539
5404:	/* Requested full context restore, use doreti for that. */
541	MEXITCOUNT
542	jmp	doreti
543
544/*
545 * Here for CYA insurance, in case a "syscall" instruction gets
546 * issued from 32 bit compatibility mode. MSR_CSTAR has to point
547 * to *something* if EFER_SCE is enabled.
548 */
549IDTVEC(fast_syscall32)
550	sysret
551
552/*
553 * DB# handler is very similar to NM#, because 'mov/pop %ss' delay
554 * generation of exception until the next instruction is executed,
555 * which might be a kernel entry.  So we must execute the handler
556 * on IST stack and be ready for non-kernel GSBASE.
557 */
558IDTVEC(dbg)
559	subq	$TF_RIP,%rsp
560	movl	$(T_TRCTRAP),TF_TRAPNO(%rsp)
561	movq	$0,TF_ADDR(%rsp)
562	movq	$0,TF_ERR(%rsp)
563	movq	%rdi,TF_RDI(%rsp)
564	movq	%rsi,TF_RSI(%rsp)
565	movq	%rdx,TF_RDX(%rsp)
566	movq	%rcx,TF_RCX(%rsp)
567	movq	%r8,TF_R8(%rsp)
568	movq	%r9,TF_R9(%rsp)
569	movq	%rax,TF_RAX(%rsp)
570	movq	%rbx,TF_RBX(%rsp)
571	movq	%rbp,TF_RBP(%rsp)
572	movq	%r10,TF_R10(%rsp)
573	movq	%r11,TF_R11(%rsp)
574	movq	%r12,TF_R12(%rsp)
575	movq	%r13,TF_R13(%rsp)
576	movq	%r14,TF_R14(%rsp)
577	movq	%r15,TF_R15(%rsp)
578	SAVE_SEGS
579	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
580	pushfq
581	andq	$~(PSL_D | PSL_AC),(%rsp)
582	popfq
583	testb	$SEL_RPL_MASK,TF_CS(%rsp)
584	jnz	dbg_fromuserspace
585	/*
586	 * We've interrupted the kernel.  Preserve GS.base in %r12,
587	 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
588	 */
589	movl	$MSR_GSBASE,%ecx
590	rdmsr
591	movq	%rax,%r12
592	shlq	$32,%rdx
593	orq	%rdx,%r12
594	/* Retrieve and load the canonical value for GS.base. */
595	movq	TF_SIZE(%rsp),%rdx
596	movl	%edx,%eax
597	shrq	$32,%rdx
598	wrmsr
599	movq	%cr3,%r13
600	movq	PCPU(KCR3),%rax
601	cmpq	$~0,%rax
602	je	1f
603	movq	%rax,%cr3
6041:	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
605	je	2f
606	movl	$MSR_IA32_SPEC_CTRL,%ecx
607	rdmsr
608	movl	%eax,%r14d
609	call	handle_ibrs_entry
6102:	FAKE_MCOUNT(TF_RIP(%rsp))
611	movq	%rsp,%rdi
612	call	trap
613	MEXITCOUNT
614	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
615	je	3f
616	movl	%r14d,%eax
617	xorl	%edx,%edx
618	movl	$MSR_IA32_SPEC_CTRL,%ecx
619	wrmsr
620	/*
621	 * Put back the preserved MSR_GSBASE value.
622	 */
6233:	movl	$MSR_GSBASE,%ecx
624	movq	%r12,%rdx
625	movl	%edx,%eax
626	shrq	$32,%rdx
627	wrmsr
628	movq	%r13,%cr3
629	RESTORE_REGS
630	addq	$TF_RIP,%rsp
631	jmp	doreti_iret
632dbg_fromuserspace:
633	/*
634	 * Switch to kernel GSBASE and kernel page table, and copy frame
635	 * from the IST stack to the normal kernel stack, since trap()
636	 * re-enables interrupts, and since we might trap on DB# while
637	 * in trap().
638	 */
639	swapgs
640	movq	PCPU(KCR3),%rax
641	cmpq	$~0,%rax
642	je	1f
643	movq	%rax,%cr3
6441:	movq	PCPU(RSP0),%rax
645	movl	$TF_SIZE,%ecx
646	subq	%rcx,%rax
647	movq	%rax,%rdi
648	movq	%rsp,%rsi
649	rep;movsb
650	movq	%rax,%rsp
651	call	handle_ibrs_entry
652	movq	PCPU(CURPCB),%rdi
653	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)
654	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
655	jz	3f
656	cmpw	$KUF32SEL,TF_FS(%rsp)
657	jne	2f
658	rdfsbase %rax
659	movq	%rax,PCB_FSBASE(%rdi)
6602:	cmpw	$KUG32SEL,TF_GS(%rsp)
661	jne	3f
662	movl	$MSR_KGSBASE,%ecx
663	rdmsr
664	shlq	$32,%rdx
665	orq	%rdx,%rax
666	movq	%rax,PCB_GSBASE(%rdi)
6673:	jmp	calltrap
668
669/*
670 * NMI handling is special.
671 *
672 * First, NMIs do not respect the state of the processor's RFLAGS.IF
673 * bit.  The NMI handler may be entered at any time, including when
674 * the processor is in a critical section with RFLAGS.IF == 0.
675 * The processor's GS.base value could be invalid on entry to the
676 * handler.
677 *
678 * Second, the processor treats NMIs specially, blocking further NMIs
679 * until an 'iretq' instruction is executed.  We thus need to execute
680 * the NMI handler with interrupts disabled, to prevent a nested interrupt
681 * from executing an 'iretq' instruction and inadvertently taking the
682 * processor out of NMI mode.
683 *
684 * Third, the NMI handler runs on its own stack (tss_ist2). The canonical
685 * GS.base value for the processor is stored just above the bottom of its
686 * NMI stack.  For NMIs taken from kernel mode, the current value in
687 * the processor's GS.base is saved at entry to C-preserved register %r12,
688 * the canonical value for GS.base is then loaded into the processor, and
689 * the saved value is restored at exit time.  For NMIs taken from user mode,
690 * the cheaper 'SWAPGS' instructions are used for swapping GS.base.
691 */
692
693IDTVEC(nmi)
694	subq	$TF_RIP,%rsp
695	movl	$(T_NMI),TF_TRAPNO(%rsp)
696	movq	$0,TF_ADDR(%rsp)
697	movq	$0,TF_ERR(%rsp)
698	movq	%rdi,TF_RDI(%rsp)
699	movq	%rsi,TF_RSI(%rsp)
700	movq	%rdx,TF_RDX(%rsp)
701	movq	%rcx,TF_RCX(%rsp)
702	movq	%r8,TF_R8(%rsp)
703	movq	%r9,TF_R9(%rsp)
704	movq	%rax,TF_RAX(%rsp)
705	movq	%rbx,TF_RBX(%rsp)
706	movq	%rbp,TF_RBP(%rsp)
707	movq	%r10,TF_R10(%rsp)
708	movq	%r11,TF_R11(%rsp)
709	movq	%r12,TF_R12(%rsp)
710	movq	%r13,TF_R13(%rsp)
711	movq	%r14,TF_R14(%rsp)
712	movq	%r15,TF_R15(%rsp)
713	SAVE_SEGS
714	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
715	pushfq
716	andq	$~(PSL_D | PSL_AC),(%rsp)
717	popfq
718	xorl	%ebx,%ebx
719	testb	$SEL_RPL_MASK,TF_CS(%rsp)
720	jnz	nmi_fromuserspace
721	/*
722	 * We've interrupted the kernel.  Preserve GS.base in %r12,
723	 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
724	 */
725	movl	$MSR_GSBASE,%ecx
726	rdmsr
727	movq	%rax,%r12
728	shlq	$32,%rdx
729	orq	%rdx,%r12
730	/* Retrieve and load the canonical value for GS.base. */
731	movq	TF_SIZE(%rsp),%rdx
732	movl	%edx,%eax
733	shrq	$32,%rdx
734	wrmsr
735	movq	%cr3,%r13
736	movq	PCPU(KCR3),%rax
737	cmpq	$~0,%rax
738	je	1f
739	movq	%rax,%cr3
7401:	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
741	je	nmi_calltrap
742	movl	$MSR_IA32_SPEC_CTRL,%ecx
743	rdmsr
744	movl	%eax,%r14d
745	call	handle_ibrs_entry
746	jmp	nmi_calltrap
747nmi_fromuserspace:
748	incl	%ebx
749	swapgs
750	movq	%cr3,%r13
751	movq	PCPU(KCR3),%rax
752	cmpq	$~0,%rax
753	je	1f
754	movq	%rax,%cr3
7551:	call	handle_ibrs_entry
756	movq	PCPU(CURPCB),%rdi
757	testq	%rdi,%rdi
758	jz	3f
759	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)
760	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
761	jz	3f
762	cmpw	$KUF32SEL,TF_FS(%rsp)
763	jne	2f
764	rdfsbase %rax
765	movq	%rax,PCB_FSBASE(%rdi)
7662:	cmpw	$KUG32SEL,TF_GS(%rsp)
767	jne	3f
768	movl	$MSR_KGSBASE,%ecx
769	rdmsr
770	shlq	$32,%rdx
771	orq	%rdx,%rax
772	movq	%rax,PCB_GSBASE(%rdi)
7733:
774/* Note: this label is also used by ddb and gdb: */
775nmi_calltrap:
776	FAKE_MCOUNT(TF_RIP(%rsp))
777	movq	%rsp,%rdi
778	call	trap
779	MEXITCOUNT
780#ifdef HWPMC_HOOKS
781	/*
782	 * Capture a userspace callchain if needed.
783	 *
784	 * - Check if the current trap was from user mode.
785	 * - Check if the current thread is valid.
786	 * - Check if the thread requires a user call chain to be
787	 *   captured.
788	 *
789	 * We are still in NMI mode at this point.
790	 */
791	testl	%ebx,%ebx
792	jz	nocallchain	/* not from userspace */
793	movq	PCPU(CURTHREAD),%rax
794	orq	%rax,%rax	/* curthread present? */
795	jz	nocallchain
796	/*
797	 * Move execution to the regular kernel stack, because we
798	 * committed to return through doreti.
799	 */
800	movq	%rsp,%rsi	/* source stack pointer */
801	movq	$TF_SIZE,%rcx
802	movq	PCPU(RSP0),%rdx
803	subq	%rcx,%rdx
804	movq	%rdx,%rdi	/* destination stack pointer */
805	shrq	$3,%rcx		/* trap frame size in long words */
806	pushfq
807	andq	$~(PSL_D | PSL_AC),(%rsp)
808	popfq
809	rep
810	movsq			/* copy trapframe */
811	movq	%rdx,%rsp	/* we are on the regular kstack */
812
813	testl	$TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
814	jz	nocallchain
815	/*
816	 * A user callchain is to be captured, so:
817	 * - Take the processor out of "NMI" mode by faking an "iret",
818	 *   to allow for nested NMI interrupts.
819	 * - Enable interrupts, so that copyin() can work.
820	 */
821	movl	%ss,%eax
822	pushq	%rax		/* tf_ss */
823	pushq	%rdx		/* tf_rsp (on kernel stack) */
824	pushfq			/* tf_rflags */
825	movl	%cs,%eax
826	pushq	%rax		/* tf_cs */
827	pushq	$outofnmi	/* tf_rip */
828	iretq
829outofnmi:
830	/*
831	 * At this point the processor has exited NMI mode and is running
832	 * with interrupts turned off on the normal kernel stack.
833	 *
834	 * If a pending NMI gets recognized at or after this point, it
835	 * will cause a kernel callchain to be traced.
836	 *
837	 * We turn interrupts back on, and call the user callchain capture hook.
838	 */
839	movq	pmc_hook,%rax
840	orq	%rax,%rax
841	jz	nocallchain
842	movq	PCPU(CURTHREAD),%rdi		/* thread */
843	movq	$PMC_FN_USER_CALLCHAIN,%rsi	/* command */
844	movq	%rsp,%rdx			/* frame */
845	sti
846	call	*%rax
847	cli
848nocallchain:
849#endif
850	testl	%ebx,%ebx	/* %ebx == 0 => return to userland */
851	jnz	doreti_exit
852	/*
853	 * Restore speculation control MSR, if preserved.
854	 */
855	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
856	je	1f
857	movl	%r14d,%eax
858	xorl	%edx,%edx
859	movl	$MSR_IA32_SPEC_CTRL,%ecx
860	wrmsr
861	/*
862	 * Put back the preserved MSR_GSBASE value.
863	 */
8641:	movl	$MSR_GSBASE,%ecx
865	movq	%r12,%rdx
866	movl	%edx,%eax
867	shrq	$32,%rdx
868	wrmsr
869	cmpb	$0, nmi_flush_l1d_sw(%rip)
870	je	2f
871	call	flush_l1d_sw		/* bhyve L1TF assist */
8722:	movq	%r13,%cr3
873	RESTORE_REGS
874	addq	$TF_RIP,%rsp
875	jmp	doreti_iret
876
877/*
878 * MC# handling is similar to NMI.
879 *
880 * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
881 * can occur at any time with a GS.base value that does not correspond
882 * to the privilege level in CS.
883 *
884 * Machine checks are not unblocked by iretq, but it is best to run
885 * the handler with interrupts disabled since the exception may have
886 * interrupted a critical section.
887 *
888 * The MC# handler runs on its own stack (tss_ist3).  The canonical
889 * GS.base value for the processor is stored just above the bottom of
890 * its MC# stack.  For exceptions taken from kernel mode, the current
891 * value in the processor's GS.base is saved at entry to C-preserved
892 * register %r12, the canonical value for GS.base is then loaded into
893 * the processor, and the saved value is restored at exit time.  For
894 * exceptions taken from user mode, the cheaper 'SWAPGS' instructions
895 * are used for swapping GS.base.
896 */
897
898IDTVEC(mchk)
899	subq	$TF_RIP,%rsp
900	movl	$(T_MCHK),TF_TRAPNO(%rsp)
901	movq	$0,TF_ADDR(%rsp)
902	movq	$0,TF_ERR(%rsp)
903	movq	%rdi,TF_RDI(%rsp)
904	movq	%rsi,TF_RSI(%rsp)
905	movq	%rdx,TF_RDX(%rsp)
906	movq	%rcx,TF_RCX(%rsp)
907	movq	%r8,TF_R8(%rsp)
908	movq	%r9,TF_R9(%rsp)
909	movq	%rax,TF_RAX(%rsp)
910	movq	%rbx,TF_RBX(%rsp)
911	movq	%rbp,TF_RBP(%rsp)
912	movq	%r10,TF_R10(%rsp)
913	movq	%r11,TF_R11(%rsp)
914	movq	%r12,TF_R12(%rsp)
915	movq	%r13,TF_R13(%rsp)
916	movq	%r14,TF_R14(%rsp)
917	movq	%r15,TF_R15(%rsp)
918	SAVE_SEGS
919	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
920	pushfq
921	andq	$~(PSL_D | PSL_AC),(%rsp)
922	popfq
923	xorl	%ebx,%ebx
924	testb	$SEL_RPL_MASK,TF_CS(%rsp)
925	jnz	mchk_fromuserspace
926	/*
927	 * We've interrupted the kernel.  Preserve GS.base in %r12,
928	 * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
929	 */
930	movl	$MSR_GSBASE,%ecx
931	rdmsr
932	movq	%rax,%r12
933	shlq	$32,%rdx
934	orq	%rdx,%r12
935	/* Retrieve and load the canonical value for GS.base. */
936	movq	TF_SIZE(%rsp),%rdx
937	movl	%edx,%eax
938	shrq	$32,%rdx
939	wrmsr
940	movq	%cr3,%r13
941	movq	PCPU(KCR3),%rax
942	cmpq	$~0,%rax
943	je	1f
944	movq	%rax,%cr3
9451:	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
946	je	mchk_calltrap
947	movl	$MSR_IA32_SPEC_CTRL,%ecx
948	rdmsr
949	movl	%eax,%r14d
950	call	handle_ibrs_entry
951	jmp	mchk_calltrap
952mchk_fromuserspace:
953	incl	%ebx
954	swapgs
955	movq	%cr3,%r13
956	movq	PCPU(KCR3),%rax
957	cmpq	$~0,%rax
958	je	1f
959	movq	%rax,%cr3
9601:	call	handle_ibrs_entry
961/* Note: this label is also used by ddb and gdb: */
962mchk_calltrap:
963	FAKE_MCOUNT(TF_RIP(%rsp))
964	movq	%rsp,%rdi
965	call	mca_intr
966	MEXITCOUNT
967	testl	%ebx,%ebx	/* %ebx == 0 => return to userland */
968	jnz	doreti_exit
969	/*
970	 * Restore speculation control MSR, if preserved.
971	 */
972	testl	$CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
973	je	1f
974	movl	%r14d,%eax
975	xorl	%edx,%edx
976	movl	$MSR_IA32_SPEC_CTRL,%ecx
977	wrmsr
978	/*
979	 * Put back the preserved MSR_GSBASE value.
980	 */
9811:	movl	$MSR_GSBASE,%ecx
982	movq	%r12,%rdx
983	movl	%edx,%eax
984	shrq	$32,%rdx
985	wrmsr
986	movq	%r13,%cr3
987	RESTORE_REGS
988	addq	$TF_RIP,%rsp
989	jmp	doreti_iret
990
991ENTRY(fork_trampoline)
992	movq	%r12,%rdi		/* function */
993	movq	%rbx,%rsi		/* arg1 */
994	movq	%rsp,%rdx		/* trapframe pointer */
995	call	fork_exit
996	MEXITCOUNT
997	jmp	doreti			/* Handle any ASTs */
998
999/*
1000 * To efficiently implement classification of trap and interrupt handlers
1001 * for profiling, there must be only trap handlers between the labels btrap
1002 * and bintr, and only interrupt handlers between the labels bintr and
1003 * eintr.  This is implemented (partly) by including files that contain
1004 * some of the handlers.  Before including the files, set up a normal asm
1005 * environment so that the included files doen't need to know that they are
1006 * included.
1007 */
1008
1009#ifdef COMPAT_FREEBSD32
1010	.data
1011	.p2align 4
1012	.text
1013	SUPERALIGN_TEXT
1014
1015#include <amd64/ia32/ia32_exception.S>
1016#endif
1017
1018	.data
1019	.p2align 4
1020	.text
1021	SUPERALIGN_TEXT
1022MCOUNT_LABEL(bintr)
1023
1024#include <amd64/amd64/apic_vector.S>
1025
1026#ifdef DEV_ATPIC
1027	.data
1028	.p2align 4
1029	.text
1030	SUPERALIGN_TEXT
1031
1032#include <amd64/amd64/atpic_vector.S>
1033#endif
1034
1035	.text
1036MCOUNT_LABEL(eintr)
1037
1038/*
1039 * void doreti(struct trapframe)
1040 *
1041 * Handle return from interrupts, traps and syscalls.
1042 */
1043	.text
1044	SUPERALIGN_TEXT
1045	.type	doreti,@function
1046	.globl	doreti
1047doreti:
1048	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
1049	/*
1050	 * Check if ASTs can be handled now.
1051	 */
1052	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* are we returning to user mode? */
1053	jz	doreti_exit		/* can't handle ASTs now if not */
1054
1055doreti_ast:
1056	/*
1057	 * Check for ASTs atomically with returning.  Disabling CPU
1058	 * interrupts provides sufficient locking even in the SMP case,
1059	 * since we will be informed of any new ASTs by an IPI.
1060	 */
1061	cli
1062	movq	PCPU(CURTHREAD),%rax
1063	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
1064	je	doreti_exit
1065	sti
1066	movq	%rsp,%rdi	/* pass a pointer to the trapframe */
1067	call	ast
1068	jmp	doreti_ast
1069
1070	/*
1071	 * doreti_exit:	pop registers, iret.
1072	 *
1073	 *	The segment register pop is a special case, since it may
1074	 *	fault if (for example) a sigreturn specifies bad segment
1075	 *	registers.  The fault is handled in trap.c.
1076	 */
1077doreti_exit:
1078	MEXITCOUNT
1079	movq	PCPU(CURPCB),%r8
1080
1081	/*
1082	 * Do not reload segment registers for kernel.
1083	 * Since we do not reload segments registers with sane
1084	 * values on kernel entry, descriptors referenced by
1085	 * segments registers might be not valid.  This is fatal
1086	 * for user mode, but is not a problem for the kernel.
1087	 */
1088	testb	$SEL_RPL_MASK,TF_CS(%rsp)
1089	jz	ld_regs
1090	testl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
1091	jz	ld_regs
1092	andl	$~PCB_FULL_IRET,PCB_FLAGS(%r8)
1093	testl	$TF_HASSEGS,TF_FLAGS(%rsp)
1094	je	set_segs
1095
1096do_segs:
1097	/* Restore %fs and fsbase */
1098	movw	TF_FS(%rsp),%ax
1099	.globl	ld_fs
1100ld_fs:
1101	movw	%ax,%fs
1102	cmpw	$KUF32SEL,%ax
1103	jne	1f
1104	movl	$MSR_FSBASE,%ecx
1105	movl	PCB_FSBASE(%r8),%eax
1106	movl	PCB_FSBASE+4(%r8),%edx
1107	.globl	ld_fsbase
1108ld_fsbase:
1109	wrmsr
11101:
1111	/* Restore %gs and gsbase */
1112	movw	TF_GS(%rsp),%si
1113	pushfq
1114	cli
1115	movl	$MSR_GSBASE,%ecx
1116	/* Save current kernel %gs base into %r12d:%r13d */
1117	rdmsr
1118	movl	%eax,%r12d
1119	movl	%edx,%r13d
1120	.globl	ld_gs
1121ld_gs:
1122	movw	%si,%gs
1123	/* Save user %gs base into %r14d:%r15d */
1124	rdmsr
1125	movl	%eax,%r14d
1126	movl	%edx,%r15d
1127	/* Restore kernel %gs base */
1128	movl	%r12d,%eax
1129	movl	%r13d,%edx
1130	wrmsr
1131	popfq
1132	/*
1133	 * Restore user %gs base, either from PCB if used for TLS, or
1134	 * from the previously saved msr read.
1135	 */
1136	movl	$MSR_KGSBASE,%ecx
1137	cmpw	$KUG32SEL,%si
1138	jne	1f
1139	movl	PCB_GSBASE(%r8),%eax
1140	movl	PCB_GSBASE+4(%r8),%edx
1141	jmp	ld_gsbase
11421:
1143	movl	%r14d,%eax
1144	movl	%r15d,%edx
1145	.globl	ld_gsbase
1146ld_gsbase:
1147	wrmsr	/* May trap if non-canonical, but only for TLS. */
1148	.globl	ld_es
1149ld_es:
1150	movw	TF_ES(%rsp),%es
1151	.globl	ld_ds
1152ld_ds:
1153	movw	TF_DS(%rsp),%ds
1154ld_regs:
1155	RESTORE_REGS
1156	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
1157	jz	2f			/* keep running with kernel GS.base */
1158	cli
1159	call	handle_ibrs_exit_rs
1160	cmpq	$~0,PCPU(UCR3)
1161	je	1f
1162	pushq	%rdx
1163	movq	PCPU(PTI_RSP0),%rdx
1164	subq	$PTI_SIZE,%rdx
1165	movq	%rax,PTI_RAX(%rdx)
1166	popq	%rax
1167	movq	%rax,PTI_RDX(%rdx)
1168	movq	TF_RIP(%rsp),%rax
1169	movq	%rax,PTI_RIP(%rdx)
1170	movq	TF_CS(%rsp),%rax
1171	movq	%rax,PTI_CS(%rdx)
1172	movq	TF_RFLAGS(%rsp),%rax
1173	movq	%rax,PTI_RFLAGS(%rdx)
1174	movq	TF_RSP(%rsp),%rax
1175	movq	%rax,PTI_RSP(%rdx)
1176	movq	TF_SS(%rsp),%rax
1177	movq	%rax,PTI_SS(%rdx)
1178	movq	PCPU(UCR3),%rax
1179	swapgs
1180	movq	%rdx,%rsp
1181	movq	%rax,%cr3
1182	popq	%rdx
1183	popq	%rax
1184	addq	$8,%rsp
1185	jmp	doreti_iret
11861:	swapgs
11872:	addq	$TF_RIP,%rsp
1188	.globl	doreti_iret
1189doreti_iret:
1190	iretq
1191
1192set_segs:
1193	movw	$KUDSEL,%ax
1194	movw	%ax,TF_DS(%rsp)
1195	movw	%ax,TF_ES(%rsp)
1196	movw	$KUF32SEL,TF_FS(%rsp)
1197	movw	$KUG32SEL,TF_GS(%rsp)
1198	jmp	do_segs
1199
1200	/*
1201	 * doreti_iret_fault.  Alternative return code for
1202	 * the case where we get a fault in the doreti_exit code
1203	 * above.  trap() (amd64/amd64/trap.c) catches this specific
1204	 * case, sends the process a signal and continues in the
1205	 * corresponding place in the code below.
1206	 */
1207	ALIGN_TEXT
1208	.globl	doreti_iret_fault
1209doreti_iret_fault:
1210	subq	$TF_RIP,%rsp		/* space including tf_err, tf_trapno */
1211	movq	%rax,TF_RAX(%rsp)
1212	movq	%rdx,TF_RDX(%rsp)
1213	movq	%rcx,TF_RCX(%rsp)
1214	call	handle_ibrs_entry
1215	testb	$SEL_RPL_MASK,TF_CS(%rsp)
1216	jz	1f
1217	sti
12181:
1219	SAVE_SEGS
1220	movl	$TF_HASSEGS,TF_FLAGS(%rsp)
1221	movq	%rdi,TF_RDI(%rsp)
1222	movq	%rsi,TF_RSI(%rsp)
1223	movq	%r8,TF_R8(%rsp)
1224	movq	%r9,TF_R9(%rsp)
1225	movq	%rbx,TF_RBX(%rsp)
1226	movq	%rbp,TF_RBP(%rsp)
1227	movq	%r10,TF_R10(%rsp)
1228	movq	%r11,TF_R11(%rsp)
1229	movq	%r12,TF_R12(%rsp)
1230	movq	%r13,TF_R13(%rsp)
1231	movq	%r14,TF_R14(%rsp)
1232	movq	%r15,TF_R15(%rsp)
1233	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1234	movq	$0,TF_ERR(%rsp)	/* XXX should be the error code */
1235	movq	$0,TF_ADDR(%rsp)
1236	FAKE_MCOUNT(TF_RIP(%rsp))
1237	jmp	calltrap
1238
1239	ALIGN_TEXT
1240	.globl	ds_load_fault
1241ds_load_fault:
1242	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1243	testb	$SEL_RPL_MASK,TF_CS(%rsp)
1244	jz	1f
1245	sti
12461:
1247	movq	%rsp,%rdi
1248	call	trap
1249	movw	$KUDSEL,TF_DS(%rsp)
1250	jmp	doreti
1251
1252	ALIGN_TEXT
1253	.globl	es_load_fault
1254es_load_fault:
1255	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1256	testl	$PSL_I,TF_RFLAGS(%rsp)
1257	jz	1f
1258	sti
12591:
1260	movq	%rsp,%rdi
1261	call	trap
1262	movw	$KUDSEL,TF_ES(%rsp)
1263	jmp	doreti
1264
1265	ALIGN_TEXT
1266	.globl	fs_load_fault
1267fs_load_fault:
1268	testl	$PSL_I,TF_RFLAGS(%rsp)
1269	jz	1f
1270	sti
12711:
1272	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1273	movq	%rsp,%rdi
1274	call	trap
1275	movw	$KUF32SEL,TF_FS(%rsp)
1276	jmp	doreti
1277
1278	ALIGN_TEXT
1279	.globl	gs_load_fault
1280gs_load_fault:
1281	popfq
1282	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1283	testl	$PSL_I,TF_RFLAGS(%rsp)
1284	jz	1f
1285	sti
12861:
1287	movq	%rsp,%rdi
1288	call	trap
1289	movw	$KUG32SEL,TF_GS(%rsp)
1290	jmp	doreti
1291
1292	ALIGN_TEXT
1293	.globl	fsbase_load_fault
1294fsbase_load_fault:
1295	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1296	testl	$PSL_I,TF_RFLAGS(%rsp)
1297	jz	1f
1298	sti
12991:
1300	movq	%rsp,%rdi
1301	call	trap
1302	movq	PCPU(CURTHREAD),%r8
1303	movq	TD_PCB(%r8),%r8
1304	movq	$0,PCB_FSBASE(%r8)
1305	jmp	doreti
1306
1307	ALIGN_TEXT
1308	.globl	gsbase_load_fault
1309gsbase_load_fault:
1310	movl	$T_PROTFLT,TF_TRAPNO(%rsp)
1311	testl	$PSL_I,TF_RFLAGS(%rsp)
1312	jz	1f
1313	sti
13141:
1315	movq	%rsp,%rdi
1316	call	trap
1317	movq	PCPU(CURTHREAD),%r8
1318	movq	TD_PCB(%r8),%r8
1319	movq	$0,PCB_GSBASE(%r8)
1320	jmp	doreti
1321
1322#ifdef HWPMC_HOOKS
1323	ENTRY(end_exceptions)
1324#endif
1325