xref: /dragonfly/sys/cpu/x86_64/include/asmacros.h (revision 50b09fda)
1 /*
2  * Copyright (c) 1993 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD: src/sys/amd64/include/asmacros.h,v 1.32 2006/10/28 06:04:29 bde Exp $
31  */
32 
33 #ifndef _CPU_ASMACROS_H_
34 #define _CPU_ASMACROS_H_
35 
36 #include <sys/cdefs.h>
37 #include <machine/specialreg.h>
38 
39 /* XXX too much duplication in various asm*.h's. */
40 
41 /*
42  * CNAME is used to manage the relationship between symbol names in C
43  * and the equivalent assembly language names.  CNAME is given a name as
44  * it would be used in a C program.  It expands to the equivalent assembly
45  * language name.
46  */
47 #define CNAME(csym)		csym
48 
49 #define ALIGN_DATA	.p2align 3	/* 8 byte alignment, zero filled */
50 #ifdef GPROF
51 #define ALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
52 #else
53 #define ALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
54 #endif
55 #define SUPERALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
56 
57 #define GEN_ENTRY(name)		ALIGN_TEXT; .globl CNAME(name); \
58 				.type CNAME(name),@function; CNAME(name):
59 #define NON_GPROF_ENTRY(name)	GEN_ENTRY(name)
60 #define NON_GPROF_RET		.byte 0xc3	/* opcode for `ret' */
61 
62 #define	END(name)		.size name, . - name
63 
64 #ifdef GPROF
65 /*
66  * __mcount is like [.]mcount except that doesn't require its caller to set
67  * up a frame pointer.  It must be called before pushing anything onto the
68  * stack.  gcc should eventually generate code to call __mcount in most
69  * cases.  This would make -pg in combination with -fomit-frame-pointer
70  * useful.  gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
71  * allow profiling before setting up the frame pointer, but this is
72  * inadequate for good handling of special cases, e.g., -fpic works best
73  * with profiling after the prologue.
74  *
75  * [.]mexitcount is a new function to support non-statistical profiling if an
76  * accurate clock is available.  For C sources, calls to it are generated
77  * by the FreeBSD extension `-mprofiler-epilogue' to gcc.  It is best to
78  * call [.]mexitcount at the end of a function like the MEXITCOUNT macro does,
79  * but gcc currently generates calls to it at the start of the epilogue to
80  * avoid problems with -fpic.
81  *
82  * [.]mcount and __mcount may clobber the call-used registers and %ef.
83  * [.]mexitcount may clobber %ecx and %ef.
84  *
85  * Cross-jumping makes non-statistical profiling timing more complicated.
86  * It is handled in many cases by calling [.]mexitcount before jumping.  It
87  * is handled for conditional jumps using CROSSJUMP() and CROSSJUMP_LABEL().
88  * It is handled for some fault-handling jumps by not sharing the exit
89  * routine.
90  *
91  * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
92  * the main entry point.  Note that alt entries are counted twice.  They
93  * have to be counted as ordinary entries for gprof to get the call times
94  * right for the ordinary entries.
95  *
96  * High local labels are used in macros to avoid clashes with local labels
97  * in functions.
98  *
99  * Ordinary `ret' is used instead of a macro `RET' because there are a lot
100  * of `ret's.  0xc3 is the opcode for `ret' (`#define ret ... ret' can't
101  * be used because this file is sometimes preprocessed in traditional mode).
102  * `ret' clobbers eflags but this doesn't matter.
103  */
104 #define ALTENTRY(name)		GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
105 #define	CROSSJUMP(jtrue, label, jfalse) \
106 	jfalse 8f; MEXITCOUNT; jmp __CONCAT(to,label); 8:
107 #define CROSSJUMPTARGET(label) \
108 	ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label
109 #define ENTRY(name)		GEN_ENTRY(name) ; 9: ; MCOUNT
110 #define FAKE_MCOUNT(caller)	pushq caller ; call __mcount ; popq %rcx
111 #define MCOUNT			call __mcount
112 #define MCOUNT_LABEL(name)	GEN_ENTRY(name) ; nop ; ALIGN_TEXT
113 #ifdef GUPROF
114 #define MEXITCOUNT		call .mexitcount
115 #define ret			MEXITCOUNT ; NON_GPROF_RET
116 #else
117 #define MEXITCOUNT
118 #endif
119 
120 #else /* !GPROF */
121 /*
122  * ALTENTRY() has to align because it is before a corresponding ENTRY().
123  * ENTRY() has to align to because there may be no ALTENTRY() before it.
124  * If there is a previous ALTENTRY() then the alignment code for ENTRY()
125  * is empty.
126  */
127 #define ALTENTRY(name)		GEN_ENTRY(name)
128 #define	CROSSJUMP(jtrue, label, jfalse)	jtrue label
129 #define	CROSSJUMPTARGET(label)
130 #define ENTRY(name)		GEN_ENTRY(name)
131 #define FAKE_MCOUNT(caller)
132 #define MCOUNT
133 #define MCOUNT_LABEL(name)
134 #define MEXITCOUNT
135 #endif /* GPROF */
136 
137 #ifdef LOCORE
138 /*
139  * Convenience macro for declaring interrupt entry points.
140  */
141 #define	IDTVEC(name)	ALIGN_TEXT; .globl __CONCAT(X,name); \
142 			.type __CONCAT(X,name),@function; __CONCAT(X,name):
143 
144 /*
145  * stack frame macro support - supports mmu isolation, swapgs, and
146  * stack frame pushing and popping.
147  */
148 
149 /*
150  * Kernel pmap isolation to work-around the massive Intel mmu bug
151  * that allows kernel memory to be sussed out due to speculative memory
152  * reads and instruction execution creating timing differences that can
153  * be detected by userland.  e.g. force speculative read, speculatively
154  * execute a cmp/branch sequence, detect timing.  Iterate cmp $values
155  * to suss-out content of speculatively read kernel memory.
156  *
157  * We do this by creating a trampoline area for all user->kernel and
158  * kernel->user transitions.  The trampoline area allows us to limit
159  * the reach the kernel map in the isolated version of the user pmap
160  * to JUST the trampoline area (for all cpus), tss, and vector area.
161  *
162  * It is very important that these transitions not access any memory
163  * outside of the trampoline page while the isolated user process pmap
164  * is active in %cr3.
165  *
166  * The trampoline does not add much overhead when pmap isolation is
167  * disabled, so we just run with it regardless.  Of course, when pmap
168  * isolation is enabled, the %cr3 loads add 150-250ns to every system
169  * call as well as (without PCID) smash the TLB.
170  *
171  * KMMUENTER -	Executed by the trampoline when a user->kernel transition
172  *		is detected.  The stack pointer points into the pcpu
173  *		trampoline space and is available for register save/restore.
174  *		Other registers have not yet been saved.  %gs points at
175  *		the kernel pcpu structure.
176  *
177  *		Caller has already determined that a transition is in
178  *		progress and has already issued the swapgs.  hwtf indicates
179  *		how much hardware has already pushed.
180  *
181  * KMMUEXIT  -	Executed when a kernel->user transition is made.  The stack
182  *		pointer points into the pcpu trampoline space and we are
183  *		almost ready to iretq.  %gs still points at the kernel pcpu
184  *		structure.
185  *
186  *		Caller has already determined that a transition is in
187  *		progress.  hwtf indicates how much hardware has already
188  *		pushed.
189  */
190 
191 /*
192  * KMMUENTER_CORE - Handles ISOMMU, IBRS, and IBPB.  Caller has already
193  *		    saved %rcx and %rdx.  We have to deal with %rax.
194  *
195  *		    XXX If IBPB is not supported, try to clear the
196  *		    call return hw cache w/ many x chained call sequence?
197  *
198  * NOTE - IBRS2 - We are leaving IBRS on full-time.  However, Intel
199  *		  believes it is not safe unless the MSR is poked on each
200  *		  user->kernel transition, so poke the MSR for both IBRS1
201  *		  and IBRS2.
202  */
203 #define KMMUENTER_CORE							\
204 	testq	$PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ; 		\
205 	je	40f ;							\
206 	movq	PCPU(trampoline)+TR_PCB_CR3,%rcx ;			\
207 	movq	%rcx,%cr3 ;						\
208 40:	movl	PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;		\
209 	testq	%rdx, %rdx ;						\
210 	je	43f ;							\
211 	movq	%rax, PCPU(trampoline)+TR_RAX ;				\
212 	testq	$SPEC_CTRL_DUMMY_ENABLE,%rdx ;				\
213 	je	41f ;							\
214 	movq	%rdx, %rax ;						\
215 	andq	$SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;			\
216 	movq	$MSR_SPEC_CTRL,%rcx ;					\
217 	xorl	%edx,%edx ;						\
218 	wrmsr ;								\
219 	movl	PCPU(trampoline)+TR_PCB_SPEC_CTRL,%edx ;		\
220 41:	testq	$SPEC_CTRL_DUMMY_IBPB,%rdx ;				\
221 	je	42f ;							\
222 	movl	$MSR_PRED_CMD,%ecx ;					\
223 	movl	$1,%eax ;						\
224 	xorl	%edx,%edx ;						\
225 	wrmsr ;								\
226 42:	movq	PCPU(trampoline)+TR_RAX, %rax ;				\
227 43:									\
228 
229 /*
230  * Enter with trampoline, hardware pushed up to %rip
231  */
232 #define KMMUENTER_TFRIP							\
233 	subq	$TR_RIP, %rsp ;						\
234 	movq	%rcx, TR_RCX(%rsp) ;					\
235 	movq	%rdx, TR_RDX(%rsp) ;					\
236 	KMMUENTER_CORE ;						\
237 	movq	%rsp, %rcx ;		/* trampoline rsp */		\
238 	movq	PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */	\
239 	movq	TR_SS(%rcx), %rdx ;					\
240 	pushq	%rdx ;							\
241 	movq	TR_RSP(%rcx), %rdx ;					\
242 	pushq	%rdx ;							\
243 	movq	TR_RFLAGS(%rcx), %rdx ;					\
244 	pushq	%rdx ;							\
245 	movq	TR_CS(%rcx), %rdx ;					\
246 	pushq	%rdx ;							\
247 	movq	TR_RIP(%rcx), %rdx ;					\
248 	pushq	%rdx ;							\
249 	movq	TR_RDX(%rcx), %rdx ;					\
250 	movq	TR_RCX(%rcx), %rcx					\
251 
252 /*
253  * Enter with trampoline, hardware pushed up to ERR
254  */
255 #define KMMUENTER_TFERR							\
256 	subq	$TR_ERR, %rsp ;						\
257 	movq	%rcx, TR_RCX(%rsp) ;					\
258 	movq	%rdx, TR_RDX(%rsp) ;					\
259 	KMMUENTER_CORE ;						\
260 	movq	%rsp, %rcx ;		/* trampoline rsp */		\
261 	movq	PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */	\
262 	movq	TR_SS(%rcx), %rdx ;					\
263 	pushq	%rdx ;							\
264 	movq	TR_RSP(%rcx), %rdx ;					\
265 	pushq	%rdx ;							\
266 	movq	TR_RFLAGS(%rcx), %rdx ;					\
267 	pushq	%rdx ;							\
268 	movq	TR_CS(%rcx), %rdx ;					\
269 	pushq	%rdx ;							\
270 	movq	TR_RIP(%rcx), %rdx ;					\
271 	pushq	%rdx ;							\
272 	movq	TR_ERR(%rcx), %rdx ;					\
273 	pushq	%rdx ;							\
274 	movq	TR_RDX(%rcx), %rdx ;					\
275 	movq	TR_RCX(%rcx), %rcx					\
276 
277 /*
278  * Enter with trampoline, hardware pushed up to ERR and
279  * we need to save %cr2 early (before potentially reloading %cr3).
280  */
281 #define KMMUENTER_TFERR_SAVECR2						\
282 	subq	$TR_ERR, %rsp ;						\
283 	movq	%rcx, TR_RCX(%rsp) ;					\
284 	movq	%rdx, TR_RDX(%rsp) ;					\
285 	movq	%cr2, %rcx ;						\
286 	movq	%rcx, PCPU(trampoline)+TR_CR2 ;				\
287 	KMMUENTER_CORE ;						\
288 	movq	%rsp, %rcx ;		/* trampoline rsp */		\
289 	movq	PCPU(trampoline)+TR_PCB_RSP,%rsp ; /* kstack rsp */	\
290 	movq	TR_SS(%rcx), %rdx ;					\
291 	pushq	%rdx ;							\
292 	movq	TR_RSP(%rcx), %rdx ;					\
293 	pushq	%rdx ;							\
294 	movq	TR_RFLAGS(%rcx), %rdx ;					\
295 	pushq	%rdx ;							\
296 	movq	TR_CS(%rcx), %rdx ;					\
297 	pushq	%rdx ;							\
298 	movq	TR_RIP(%rcx), %rdx ;					\
299 	pushq	%rdx ;							\
300 	movq	TR_ERR(%rcx), %rdx ;					\
301 	pushq	%rdx ;							\
302 	movq	TR_RDX(%rcx), %rdx ;					\
303 	movq	TR_RCX(%rcx), %rcx					\
304 
305 /*
306  * Set %cr3 if necessary on syscall entry.  No registers may be
307  * disturbed.
308  *
309  * NOTE: TR_CR2 is used by the caller to save %rsp, we cannot use it here.
310  */
311 #define KMMUENTER_SYSCALL						\
312 	movq	%rcx, PCPU(trampoline)+TR_RCX ;				\
313 	movq	%rdx, PCPU(trampoline)+TR_RDX ;				\
314 	KMMUENTER_CORE ;						\
315 	movq	PCPU(trampoline)+TR_RDX, %rdx ;				\
316 	movq	PCPU(trampoline)+TR_RCX, %rcx 				\
317 
318 /*
319  * KMMUEXIT_CORE handles IBRS and STIBP, but not ISOMMU
320  *
321  * We don't re-execute the IBPB barrier on exit atm.
322  */
323 #define KMMUEXIT_CORE							\
324 	testq	$SPEC_CTRL_DUMMY_ENABLE,PCPU(trampoline)+TR_PCB_SPEC_CTRL+4 ; \
325 	je	41f ;							\
326 	movq	%rax, PCPU(trampoline)+TR_RAX ;				\
327 	movq	%rcx, PCPU(trampoline)+TR_RCX ;				\
328 	movq	%rdx, PCPU(trampoline)+TR_RDX ;				\
329 	movl	PCPU(trampoline)+TR_PCB_SPEC_CTRL+4, %eax ;		\
330 	andq	$SPEC_CTRL_IBRS|SPEC_CTRL_STIBP, %rax ;			\
331 	movq	$MSR_SPEC_CTRL,%rcx ;					\
332 	xorl	%edx,%edx ;						\
333 	wrmsr ;								\
334 	movq	PCPU(trampoline)+TR_RDX, %rdx ;				\
335 	movq	PCPU(trampoline)+TR_RCX, %rcx ;				\
336 	movq	PCPU(trampoline)+TR_RAX, %rax ;				\
337 41:
338 
339 /*
340  * We are positioned at the base of the trapframe.  Advance the trapframe
341  * and handle MMU isolation.  MMU isolation requires us to copy the
342  * hardware frame to the trampoline area before setting %cr3 to the
343  * isolated map.  We then set the %rsp for iretq to TR_RIP in the
344  * trampoline area (after restoring the register we saved in TR_ERR).
345  */
346 #define KMMUEXIT							\
347 	addq	$TF_RIP,%rsp ;						\
348 	KMMUEXIT_CORE ;							\
349 	testq	$PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ; 		\
350 	je	40f ;							\
351 	movq	%rcx, PCPU(trampoline)+TR_ERR ;	/* save in TR_ERR */	\
352 	popq	%rcx ;				/* copy %rip */		\
353 	movq	%rcx, PCPU(trampoline)+TR_RIP ;				\
354 	popq	%rcx ;				/* copy %cs */		\
355 	movq	%rcx, PCPU(trampoline)+TR_CS ;				\
356 	popq	%rcx ;				/* copy %rflags */	\
357 	movq	%rcx, PCPU(trampoline)+TR_RFLAGS ;			\
358 	popq	%rcx ;				/* copy %rsp */		\
359 	movq	%rcx, PCPU(trampoline)+TR_RSP ;				\
360 	popq	%rcx ;				/* copy %ss */		\
361 	movq	%rcx, PCPU(trampoline)+TR_SS ;				\
362 	movq	%gs:0,%rcx ;						\
363 	addq	$GD_TRAMPOLINE+TR_ERR,%rcx ;				\
364 	movq	%rcx,%rsp ;						\
365 	movq	PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;			\
366 	movq	%rcx,%cr3 ;						\
367 	popq	%rcx ;		/* positioned at TR_RIP after this */	\
368 40:									\
369 
370 /*
371  * Warning: user stack pointer already loaded into %rsp at this
372  * point.  We still have the kernel %gs.
373  *
374  * Caller will sysexit, we do not have to copy anything to the
375  * trampoline area.
376  */
377 #define KMMUEXIT_SYSCALL						\
378 	KMMUEXIT_CORE ;							\
379 	testq	$PCB_ISOMMU,PCPU(trampoline)+TR_PCB_FLAGS ; 		\
380 	je	40f ;							\
381 	movq	%rcx, PCPU(trampoline)+TR_RCX ;				\
382 	movq	PCPU(trampoline)+TR_PCB_CR3_ISO,%rcx ;			\
383 	movq	%rcx,%cr3 ;						\
384 	movq	PCPU(trampoline)+TR_RCX, %rcx ;				\
385 40:									\
386 
387 /*
388  * Macros to create and destroy a trap frame.  rsp has already been shifted
389  * to the base of the trapframe in the thread structure.
390  */
391 #define PUSH_FRAME_REGS							\
392 	movq	%rdi,TF_RDI(%rsp) ;					\
393 	movq	%rsi,TF_RSI(%rsp) ;					\
394 	movq	%rdx,TF_RDX(%rsp) ;					\
395 	movq	%rcx,TF_RCX(%rsp) ;					\
396 	movq	%r8,TF_R8(%rsp) ;					\
397 	movq	%r9,TF_R9(%rsp) ;					\
398 	movq	%rax,TF_RAX(%rsp) ;					\
399 	movq	%rbx,TF_RBX(%rsp) ;					\
400 	movq	%rbp,TF_RBP(%rsp) ;					\
401 	movq	%r10,TF_R10(%rsp) ;					\
402 	movq	%r11,TF_R11(%rsp) ;					\
403 	movq	%r12,TF_R12(%rsp) ;					\
404 	movq	%r13,TF_R13(%rsp) ;					\
405 	movq	%r14,TF_R14(%rsp) ;					\
406 	movq	%r15,TF_R15(%rsp) ;					\
407 					/* SECURITY CLEAR REGS */	\
408 	xorq	%rax,%rax ;						\
409 	movq	%rax,%rbx ;						\
410 	movq	%rax,%rcx ;						\
411 	movq	%rax,%rdx ;						\
412 	movq	%rax,%rdi ;						\
413 	movq	%rax,%rsi ;						\
414 	movq	%rax,%rbp ;						\
415 	movq	%rax,%r8 ;						\
416 	movq	%rax,%r9 ;						\
417 	movq	%rax,%r10 ;						\
418 	movq	%rax,%r11 ;						\
419 	movq	%rax,%r12 ;						\
420 	movq	%rax,%r13 ;						\
421 	movq	%rax,%r14 ;						\
422 	movq	%rax,%r15 						\
423 
424 
425 /*
426  * PUSH_FRAME is the first thing executed upon interrupt entry.  We are
427  * responsible for swapgs execution and the KMMUENTER dispatch.
428  *
429  * NOTE - PUSH_FRAME code doesn't mess with %gs or the stack, or assume it can
430  *	  use PCPU(trampoline), if the trap/exception is from supevisor mode.
431  *	  It only messes with that stuff when the trap/exception is from user
432  *	  mode.  Our DBG and NMI code depend on this behavior.
433  */
434 #define PUSH_FRAME_TFRIP						\
435 	testb	$SEL_RPL_MASK,TF_CS-TF_RIP(%rsp) ; /* from userland? */	\
436 	jz	1f ;							\
437 	swapgs ;		/* from userland */			\
438 	KMMUENTER_TFRIP ;	/* from userland */			\
439 1:									\
440 	subq	$TF_RIP,%rsp ;						\
441 	PUSH_FRAME_REGS 						\
442 
443 #define PUSH_FRAME_TFERR						\
444 	testb	$SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ; /* from userland? */	\
445 	jz	1f ;							\
446 	swapgs ;		/* from userland */			\
447 	KMMUENTER_TFERR ;	/* from userland */			\
448 1:									\
449 	subq	$TF_ERR,%rsp ;						\
450 	PUSH_FRAME_REGS 						\
451 
452 #define PUSH_FRAME_TFERR_SAVECR2					\
453 	testb	$SEL_RPL_MASK,TF_CS-TF_ERR(%rsp) ;			\
454 	jz	1f ;							\
455 	swapgs ;		/* from userland */			\
456 	KMMUENTER_TFERR_SAVECR2 ;/* from userland */			\
457 	subq	$TF_ERR,%rsp ;						\
458 	PUSH_FRAME_REGS ;						\
459 	movq	PCPU(trampoline)+TR_CR2, %r10 ;				\
460 	jmp 2f ;							\
461 1:									\
462 	subq	$TF_ERR,%rsp ;						\
463 	PUSH_FRAME_REGS ;						\
464 	movq	%cr2, %r10 ;						\
465 2:									\
466 	movq	%r10, TF_ADDR(%rsp)
467 
468 /*
469  * POP_FRAME is issued just prior to the iretq, or just prior to a
470  * jmp doreti_iret.  These must be passed in to the macro.
471  */
472 #define POP_FRAME(lastinsn)						\
473 	movq	TF_RDI(%rsp),%rdi ;					\
474 	movq	TF_RSI(%rsp),%rsi ;					\
475 	movq	TF_RDX(%rsp),%rdx ;					\
476 	movq	TF_RCX(%rsp),%rcx ;					\
477 	movq	TF_R8(%rsp),%r8 ;					\
478 	movq	TF_R9(%rsp),%r9 ;					\
479 	movq	TF_RAX(%rsp),%rax ;					\
480 	movq	TF_RBX(%rsp),%rbx ;					\
481 	movq	TF_RBP(%rsp),%rbp ;					\
482 	movq	TF_R10(%rsp),%r10 ;					\
483 	movq	TF_R11(%rsp),%r11 ;					\
484 	movq	TF_R12(%rsp),%r12 ;					\
485 	movq	TF_R13(%rsp),%r13 ;					\
486 	movq	TF_R14(%rsp),%r14 ;					\
487 	movq	TF_R15(%rsp),%r15 ;					\
488 	cli ;								\
489 	testb	$SEL_RPL_MASK,TF_CS(%rsp) ; /* return to user? */	\
490 	jz	1f ;							\
491 	KMMUEXIT ;		/* return to user */			\
492 	swapgs ;		/* return to user */			\
493 	jmp	2f ;							\
494 1:									\
495 	addq	$TF_RIP,%rsp ;	/* setup for iretq */			\
496 2:									\
497 	lastinsn
498 
499 /*
500  * Access per-CPU data.
501  */
502 #define	PCPU(member)		%gs:gd_ ## member
503 #define PCPU_E8(member,idx)	%gs:gd_ ## member(,idx,8)
504 #define	PCPU_ADDR(member, reg)					\
505 	movq %gs:PC_PRVSPACE, reg ;				\
506 	addq $PC_ ## member, reg
507 
508 #endif /* LOCORE */
509 
510 #endif /* !_CPU_ASMACROS_H_ */
511