xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_asm.S (revision 271171e0)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2008 John Birrell <jb@freebsd.org>
22 *
23 * $FreeBSD$
24 *
25 */
26/*
27 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31#define _ASM
32
33#include <machine/asmacros.h>
34#include <sys/cpuvar_defs.h>
35#include <sys/dtrace.h>
36
37#include "assym.inc"
38
39#define INTR_POP				\
40	movq	TF_RDI(%rsp),%rdi;		\
41	movq	TF_RSI(%rsp),%rsi;		\
42	movq	TF_RDX(%rsp),%rdx;		\
43	movq	TF_RCX(%rsp),%rcx;		\
44	movq	TF_R8(%rsp),%r8;		\
45	movq	TF_R9(%rsp),%r9;		\
46	movq	TF_RAX(%rsp),%rax;		\
47	movq	TF_RBX(%rsp),%rbx;		\
48	movq	TF_RBP(%rsp),%rbp;		\
49	movq	TF_R10(%rsp),%r10;		\
50	movq	TF_R11(%rsp),%r11;		\
51	movq	TF_R12(%rsp),%r12;		\
52	movq	TF_R13(%rsp),%r13;		\
53	movq	TF_R14(%rsp),%r14;		\
54	movq	TF_R15(%rsp),%r15;		\
55	testb	$SEL_RPL_MASK,TF_CS(%rsp);	\
56	jz	1f;				\
57	cli;					\
58	swapgs;					\
591:	addq	$TF_RIP,%rsp;
60
61.globl dtrace_invop_callsite
62.type dtrace_invop_callsite,@function
63
64	ENTRY(dtrace_invop_start)
65
66	/*
67	 * #BP traps with %rip set to the next address. We need to decrement
68	 * the value to indicate the address of the int3 (0xcc) instruction
69	 * that we substituted.
70	 */
71	movq	TF_RIP(%rsp), %rdi
72	decq	%rdi
73	movq	%rsp, %rsi
74
75	/*
76	 * Allocate some scratch space to let the invop handler return a value.
77	 * This is needed when emulating "call" instructions.
78	 */
79	subq	$16, %rsp
80	movq	%rsp, %rdx
81
82	call	dtrace_invop
83dtrace_invop_callsite:
84	addq	$16, %rsp
85
86	cmpl	$DTRACE_INVOP_PUSHL_EBP, %eax
87	je	bp_push
88	cmpl	$DTRACE_INVOP_CALL, %eax
89	je	bp_call
90	cmpl	$DTRACE_INVOP_LEAVE, %eax
91	je	bp_leave
92	cmpl	$DTRACE_INVOP_NOP, %eax
93	je	bp_nop
94	cmpl	$DTRACE_INVOP_RET, %eax
95	je	bp_ret
96
97	/* When all else fails handle the trap in the usual way. */
98	jmpq	*dtrace_invop_calltrap_addr
99
100bp_push:
101	/*
102	 * We must emulate a "pushq %rbp".  To do this, we pull the stack
103	 * down 8 bytes, and then store the base pointer.
104	 */
105	INTR_POP
106	subq	$16, %rsp		/* make room for %rbp */
107	pushq	%rax			/* push temp */
108	movq	24(%rsp), %rax		/* load calling RIP */
109	movq	%rax, 8(%rsp)		/* store calling RIP */
110	movq	32(%rsp), %rax		/* load calling CS */
111	movq	%rax, 16(%rsp)		/* store calling CS */
112	movq	40(%rsp), %rax		/* load calling RFLAGS */
113	movq	%rax, 24(%rsp)		/* store calling RFLAGS */
114	movq	48(%rsp), %rax		/* load calling RSP */
115	subq	$8, %rax		/* make room for %rbp */
116	movq	%rax, 32(%rsp)		/* store calling RSP */
117	movq	56(%rsp), %rax		/* load calling SS */
118	movq	%rax, 40(%rsp)		/* store calling SS */
119	movq	32(%rsp), %rax		/* reload calling RSP */
120	movq	%rbp, (%rax)		/* store %rbp there */
121	popq	%rax			/* pop off temp */
122	iretq				/* return from interrupt */
123	/*NOTREACHED*/
124
125bp_call:
126	/*
127	 * Emulate a "call" instruction.  The invop handler must have already
128	 * updated the saved copy of %rip in the register set.  It's our job to
129	 * pull the hardware-saved registers down to make space for the return
130	 * address, which is provided by the invop handler in our scratch
131	 * space.
132	 */
133	INTR_POP
134	subq	$16, %rsp		/* make room for %rbp */
135	pushq	%rax			/* push temp */
136	pushq	%rbx			/* push temp */
137
138	movq	32(%rsp), %rax		/* load calling RIP */
139	movq	%rax, 16(%rsp)		/* store calling RIP */
140	movq	40(%rsp), %rax		/* load calling CS */
141	movq	%rax, 24(%rsp)		/* store calling CS */
142	movq	48(%rsp), %rax		/* load calling RFLAGS */
143	movq	%rax, 32(%rsp)		/* store calling RFLAGS */
144	movq	56(%rsp), %rax		/* load calling RSP */
145	subq	$8, %rax		/* make room for return address */
146	movq	%rax, 40(%rsp)		/* store calling RSP */
147	movq	64(%rsp), %rax		/* load calling SS */
148	movq	%rax, 48(%rsp)		/* store calling SS */
149
150	movq	-(TF_RIP - 16)(%rsp), %rax /* load return address */
151	movq	40(%rsp), %rbx		/* reload calling RSP */
152	movq	%rax, (%rbx)		/* store return address */
153
154	popq	%rbx			/* pop temp */
155	popq	%rax			/* pop temp */
156	iretq				/* return from interrupt */
157	/*NOTREACHED*/
158
159bp_leave:
160	/*
161	 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp"
162	 * followed by a "popq %rbp".  This is quite a bit simpler on amd64
163	 * than it is on i386 -- we can exploit the fact that the %rsp is
164	 * explicitly saved to effect the pop without having to reshuffle
165	 * the other data pushed for the trap.
166	 */
167	INTR_POP
168	pushq	%rax			/* push temp */
169	movq	8(%rsp), %rax		/* load calling RIP */
170	movq	%rax, 8(%rsp)		/* store calling RIP */
171	movq	(%rbp), %rax		/* get new %rbp */
172	addq	$8, %rbp		/* adjust new %rsp */
173	movq	%rbp, 32(%rsp)		/* store new %rsp */
174	movq	%rax, %rbp		/* set new %rbp */
175	popq	%rax			/* pop off temp */
176	iretq				/* return from interrupt */
177	/*NOTREACHED*/
178
179bp_nop:
180	/* We must emulate a "nop". */
181	INTR_POP
182	iretq
183	/*NOTREACHED*/
184
185bp_ret:
186	INTR_POP
187	pushq	%rax			/* push temp */
188	movq	32(%rsp), %rax		/* load %rsp */
189	movq	(%rax), %rax		/* load calling RIP */
190	movq	%rax, 8(%rsp)		/* store calling RIP */
191	addq	$8, 32(%rsp)		/* adjust new %rsp */
192	popq	%rax			/* pop off temp */
193	iretq				/* return from interrupt */
194	/*NOTREACHED*/
195
196	END(dtrace_invop_start)
197
198/*
199greg_t dtrace_getfp(void)
200*/
201	ENTRY(dtrace_getfp)
202	movq	%rbp, %rax
203	ret
204	END(dtrace_getfp)
205
206/*
207uint32_t
208dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new)
209*/
210	ENTRY(dtrace_cas32)
211	movl	%esi, %eax
212	lock
213	cmpxchgl %edx, (%rdi)
214	ret
215	END(dtrace_cas32)
216
217/*
218void *
219dtrace_casptr(void *target, void *cmp, void *new)
220*/
221	ENTRY(dtrace_casptr)
222	movq	%rsi, %rax
223	lock
224	cmpxchgq %rdx, (%rdi)
225	ret
226	END(dtrace_casptr)
227
228/*
229uintptr_t
230dtrace_caller(int aframes)
231*/
232	ENTRY(dtrace_caller)
233	movq	$-1, %rax
234	ret
235	END(dtrace_caller)
236
237/*
238void
239dtrace_copy(uintptr_t src, uintptr_t dest, size_t size)
240*/
241	ENTRY(dtrace_copy_nosmap)
242	pushq	%rbp
243	movq	%rsp, %rbp
244
245	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
246	movq	%rdx, %rcx		/* load count */
247	repz				/* repeat for count ... */
248	smovb				/*   move from %ds:rsi to %ed:rdi */
249	leave
250	ret
251	END(dtrace_copy_nosmap)
252
253	ENTRY(dtrace_copy_smap)
254	pushq	%rbp
255	movq	%rsp, %rbp
256
257	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
258	movq	%rdx, %rcx		/* load count */
259	stac
260	repz				/* repeat for count ... */
261	smovb				/*   move from %ds:rsi to %ed:rdi */
262	clac
263	leave
264	ret
265	END(dtrace_copy_smap)
266
267/*
268void
269dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
270    volatile uint16_t *flags)
271*/
272	ENTRY(dtrace_copystr_nosmap)
273	pushq	%rbp
274	movq	%rsp, %rbp
275
2760:
277	movb	(%rdi), %al		/* load from source */
278	movb	%al, (%rsi)		/* store to destination */
279	addq	$1, %rdi		/* increment source pointer */
280	addq	$1, %rsi		/* increment destination pointer */
281	subq	$1, %rdx		/* decrement remaining count */
282	cmpb	$0, %al
283	je	2f
284	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
285	jnz	1f			/* if not, continue with copying */
286	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
287	jnz	2f
2881:
289	cmpq	$0, %rdx
290	jne	0b
2912:
292	leave
293	ret
294
295	END(dtrace_copystr_nosmap)
296
297	ENTRY(dtrace_copystr_smap)
298	pushq	%rbp
299	movq	%rsp, %rbp
300
301	stac
3020:
303	movb	(%rdi), %al		/* load from source */
304	movb	%al, (%rsi)		/* store to destination */
305	addq	$1, %rdi		/* increment source pointer */
306	addq	$1, %rsi		/* increment destination pointer */
307	subq	$1, %rdx		/* decrement remaining count */
308	cmpb	$0, %al
309	je	2f
310	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
311	jnz	1f			/* if not, continue with copying */
312	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
313	jnz	2f
3141:
315	cmpq	$0, %rdx
316	jne	0b
3172:
318	clac
319	leave
320	ret
321
322	END(dtrace_copystr_smap)
323
324/*
325uintptr_t
326dtrace_fulword(void *addr)
327*/
328	ENTRY(dtrace_fulword_nosmap)
329	movq	(%rdi), %rax
330	ret
331	END(dtrace_fulword_nosmap)
332
333	ENTRY(dtrace_fulword_smap)
334	stac
335	movq	(%rdi), %rax
336	clac
337	ret
338	END(dtrace_fulword_smap)
339
340/*
341uint8_t
342dtrace_fuword8_nocheck(void *addr)
343*/
344	ENTRY(dtrace_fuword8_nocheck_nosmap)
345	xorq	%rax, %rax
346	movb	(%rdi), %al
347	ret
348	END(dtrace_fuword8_nocheck_nosmap)
349
350	ENTRY(dtrace_fuword8_nocheck_smap)
351	stac
352	xorq	%rax, %rax
353	movb	(%rdi), %al
354	clac
355	ret
356	END(dtrace_fuword8_nocheck_smap)
357
358/*
359uint16_t
360dtrace_fuword16_nocheck(void *addr)
361*/
362	ENTRY(dtrace_fuword16_nocheck_nosmap)
363	xorq	%rax, %rax
364	movw	(%rdi), %ax
365	ret
366	END(dtrace_fuword16_nocheck_nosmap)
367
368	ENTRY(dtrace_fuword16_nocheck_smap)
369	stac
370	xorq	%rax, %rax
371	movw	(%rdi), %ax
372	clac
373	ret
374	END(dtrace_fuword16_nocheck_smap)
375
376/*
377uint32_t
378dtrace_fuword32_nocheck(void *addr)
379*/
380	ENTRY(dtrace_fuword32_nocheck_nosmap)
381	xorq	%rax, %rax
382	movl	(%rdi), %eax
383	ret
384	END(dtrace_fuword32_nocheck_nosmap)
385
386	ENTRY(dtrace_fuword32_nocheck_smap)
387	stac
388	xorq	%rax, %rax
389	movl	(%rdi), %eax
390	clac
391	ret
392	END(dtrace_fuword32_nocheck_smap)
393
394/*
395uint64_t
396dtrace_fuword64_nocheck(void *addr)
397*/
398	ENTRY(dtrace_fuword64_nocheck_nosmap)
399	movq	(%rdi), %rax
400	ret
401	END(dtrace_fuword64_nocheck_nosmap)
402
403	ENTRY(dtrace_fuword64_nocheck_smap)
404	stac
405	movq	(%rdi), %rax
406	clac
407	ret
408	END(dtrace_fuword64_nocheck_smap)
409
410/*
411void
412dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
413    int fault, int fltoffs, uintptr_t illval)
414*/
415	ENTRY(dtrace_probe_error)
416	pushq	%rbp
417	movq	%rsp, %rbp
418	subq	$0x8, %rsp
419	movq	%r9, (%rsp)
420	movq	%r8, %r9
421	movq	%rcx, %r8
422	movq	%rdx, %rcx
423	movq	%rsi, %rdx
424	movq	%rdi, %rsi
425	movl	dtrace_probeid_error(%rip), %edi
426	call	dtrace_probe
427	addq	$0x8, %rsp
428	leave
429	ret
430	END(dtrace_probe_error)
431
432/*
433void
434dtrace_membar_producer(void)
435*/
436	ENTRY(dtrace_membar_producer)
437	rep;	ret	/* use 2 byte return instruction when branch target */
438			/* AMD Software Optimization Guide - Section 6.2 */
439	END(dtrace_membar_producer)
440
441/*
442void
443dtrace_membar_consumer(void)
444*/
445	ENTRY(dtrace_membar_consumer)
446	rep;	ret	/* use 2 byte return instruction when branch target */
447			/* AMD Software Optimization Guide - Section 6.2 */
448	END(dtrace_membar_consumer)
449
450/*
451dtrace_icookie_t
452dtrace_interrupt_disable(void)
453*/
454	ENTRY(dtrace_interrupt_disable)
455	pushfq
456	popq	%rax
457	cli
458	ret
459	END(dtrace_interrupt_disable)
460
461/*
462void
463dtrace_interrupt_enable(dtrace_icookie_t cookie)
464*/
465	ENTRY(dtrace_interrupt_enable)
466	pushq	%rdi
467	popfq
468	ret
469	END(dtrace_interrupt_enable)
470