1//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// This implements the X86-specific assembler for the trampolines.
12//
13//===----------------------------------------------------------------------===//
14
15#include "../builtins/assembly.h"
16#include "../sanitizer_common/sanitizer_asm.h"
17
18// XRay trampolines which are not produced by intrinsics are not System V AMD64
19// ABI compliant because they are called with a stack that is always misaligned
20// by 8 bytes with respect to a 16 bytes alignment. This is because they are
21// called immediately after the call to, or immediately before returning from,
22// the function being instrumented. This saves space in the patch point, but
23// misaligns the stack by 8 bytes.
24
25.macro ALIGN_STACK_16B
26#if defined(__APPLE__)
27	subq	$$8, %rsp
28#else
29	subq	$8, %rsp
30#endif
31	CFI_ADJUST_CFA_OFFSET(8)
32.endm
33
34.macro RESTORE_STACK_ALIGNMENT
35#if defined(__APPLE__)
36	addq	$$8, %rsp
37#else
38	addq	$8, %rsp
39#endif
40	CFI_ADJUST_CFA_OFFSET(-8)
41.endm
42
43// This macro should keep the stack aligned to 16 bytes.
44.macro SAVE_REGISTERS
45	pushfq
46	CFI_ADJUST_CFA_OFFSET(8)
47	subq $240, %rsp
48	CFI_ADJUST_CFA_OFFSET(240)
49	movq %rbp, 232(%rsp)
50	movupd	%xmm0, 216(%rsp)
51	movupd	%xmm1, 200(%rsp)
52	movupd	%xmm2, 184(%rsp)
53	movupd	%xmm3, 168(%rsp)
54	movupd	%xmm4, 152(%rsp)
55	movupd	%xmm5, 136(%rsp)
56	movupd	%xmm6, 120(%rsp)
57	movupd	%xmm7, 104(%rsp)
58	movq	%rdi, 96(%rsp)
59	movq	%rax, 88(%rsp)
60	movq	%rdx, 80(%rsp)
61	movq	%rsi, 72(%rsp)
62	movq	%rcx, 64(%rsp)
63	movq	%r8, 56(%rsp)
64	movq	%r9, 48(%rsp)
65	movq  %r10, 40(%rsp)
66	movq  %r11, 32(%rsp)
67	movq  %r12, 24(%rsp)
68	movq  %r13, 16(%rsp)
69	movq  %r14, 8(%rsp)
70	movq  %r15, 0(%rsp)
71.endm
72
73// This macro should keep the stack aligned to 16 bytes.
74.macro RESTORE_REGISTERS
75	movq  232(%rsp), %rbp
76	movupd	216(%rsp), %xmm0
77	movupd	200(%rsp), %xmm1
78	movupd	184(%rsp), %xmm2
79	movupd	168(%rsp), %xmm3
80	movupd	152(%rsp), %xmm4
81	movupd	136(%rsp), %xmm5
82	movupd	120(%rsp) , %xmm6
83	movupd	104(%rsp) , %xmm7
84	movq	96(%rsp), %rdi
85	movq	88(%rsp), %rax
86	movq	80(%rsp), %rdx
87	movq	72(%rsp), %rsi
88	movq	64(%rsp), %rcx
89	movq	56(%rsp), %r8
90	movq	48(%rsp), %r9
91	movq  40(%rsp), %r10
92	movq  32(%rsp), %r11
93	movq  24(%rsp), %r12
94	movq  16(%rsp), %r13
95	movq  8(%rsp), %r14
96	movq  0(%rsp), %r15
97	addq	$240, %rsp
98	CFI_ADJUST_CFA_OFFSET(-240)
99	popfq
100	CFI_ADJUST_CFA_OFFSET(-8)
101.endm
102
103	.text
104#if !defined(__APPLE__)
105	.section .text
106	.file "xray_trampoline_x86.S"
107#else
108	.section __TEXT,__text
109#endif
110
111//===----------------------------------------------------------------------===//
112
113	.globl ASM_SYMBOL(__xray_FunctionEntry)
114	ASM_HIDDEN(__xray_FunctionEntry)
115	.align 16, 0x90
116	ASM_TYPE_FUNCTION(__xray_FunctionEntry)
117# LLVM-MCA-BEGIN __xray_FunctionEntry
118ASM_SYMBOL(__xray_FunctionEntry):
119	CFI_STARTPROC
120	ALIGN_STACK_16B
121	SAVE_REGISTERS
122
123	// This load has to be atomic, it's concurrent with __xray_patch().
124	// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
125	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
126	testq	%rax, %rax
127	je	LOCAL_LABEL(tmp0)
128
129	// The patched function prologue puts its xray_instr_map index into %r10d.
130	movl	%r10d, %edi
131	xor	%esi,%esi
132	callq	*%rax
133
134LOCAL_LABEL(tmp0):
135	RESTORE_REGISTERS
136	RESTORE_STACK_ALIGNMENT
137	retq
138# LLVM-MCA-END
139	ASM_SIZE(__xray_FunctionEntry)
140	CFI_ENDPROC
141
142//===----------------------------------------------------------------------===//
143
144	.globl ASM_SYMBOL(__xray_FunctionExit)
145	ASM_HIDDEN(__xray_FunctionExit)
146	.align 16, 0x90
147	ASM_TYPE_FUNCTION(__xray_FunctionExit)
148# LLVM-MCA-BEGIN __xray_FunctionExit
149ASM_SYMBOL(__xray_FunctionExit):
150	CFI_STARTPROC
151	ALIGN_STACK_16B
152
153	// Save the important registers first. Since we're assuming that this
154	// function is only jumped into, we only preserve the registers for
155	// returning.
156	subq	$64, %rsp
157	CFI_ADJUST_CFA_OFFSET(64)
158	movq  %rbp, 48(%rsp)
159	movupd	%xmm0, 32(%rsp)
160	movupd	%xmm1, 16(%rsp)
161	movq	%rax, 8(%rsp)
162	movq	%rdx, 0(%rsp)
163	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
164	testq %rax,%rax
165	je	LOCAL_LABEL(tmp2)
166
167	movl	%r10d, %edi
168	movl	$1, %esi
169	callq	*%rax
170
171LOCAL_LABEL(tmp2):
172	// Restore the important registers.
173	movq  48(%rsp), %rbp
174	movupd	32(%rsp), %xmm0
175	movupd	16(%rsp), %xmm1
176	movq	8(%rsp), %rax
177	movq	0(%rsp), %rdx
178	addq	$64, %rsp
179	CFI_ADJUST_CFA_OFFSET(-64)
180
181	RESTORE_STACK_ALIGNMENT
182	retq
183# LLVM-MCA-END
184	ASM_SIZE(__xray_FunctionExit)
185	CFI_ENDPROC
186
187//===----------------------------------------------------------------------===//
188
189	.globl ASM_SYMBOL(__xray_FunctionTailExit)
190	ASM_HIDDEN(__xray_FunctionTailExit)
191	.align 16, 0x90
192	ASM_TYPE_FUNCTION(__xray_FunctionTailExit)
193# LLVM-MCA-BEGIN __xray_FunctionTailExit
194ASM_SYMBOL(__xray_FunctionTailExit):
195	CFI_STARTPROC
196	ALIGN_STACK_16B
197	SAVE_REGISTERS
198
199	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
200	testq %rax,%rax
201	je	LOCAL_LABEL(tmp4)
202
203	movl	%r10d, %edi
204	movl	$2, %esi
205	callq	*%rax
206
207LOCAL_LABEL(tmp4):
208	RESTORE_REGISTERS
209	RESTORE_STACK_ALIGNMENT
210	retq
211# LLVM-MCA-END
212	ASM_SIZE(__xray_FunctionTailExit)
213	CFI_ENDPROC
214
215//===----------------------------------------------------------------------===//
216
217	.globl ASM_SYMBOL(__xray_ArgLoggerEntry)
218	ASM_HIDDEN(__xray_ArgLoggerEntry)
219	.align 16, 0x90
220	ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry)
221# LLVM-MCA-BEGIN __xray_ArgLoggerEntry
222ASM_SYMBOL(__xray_ArgLoggerEntry):
223	CFI_STARTPROC
224	ALIGN_STACK_16B
225	SAVE_REGISTERS
226
227	// Again, these function pointer loads must be atomic; MOV is fine.
228	movq	ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
229	testq	%rax, %rax
230	jne	LOCAL_LABEL(arg1entryLog)
231
232	// If [arg1 logging handler] not set, defer to no-arg logging.
233	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
234	testq	%rax, %rax
235	je	LOCAL_LABEL(arg1entryFail)
236
237LOCAL_LABEL(arg1entryLog):
238
239	// First argument will become the third
240	movq	%rdi, %rdx
241
242	// XRayEntryType::LOG_ARGS_ENTRY into the second
243	mov	$0x3, %esi
244
245	// 32-bit function ID becomes the first
246	movl	%r10d, %edi
247
248	callq	*%rax
249
250LOCAL_LABEL(arg1entryFail):
251	RESTORE_REGISTERS
252	RESTORE_STACK_ALIGNMENT
253	retq
254# LLVM-MCA-END
255	ASM_SIZE(__xray_ArgLoggerEntry)
256	CFI_ENDPROC
257
258//===----------------------------------------------------------------------===//
259
260	.global ASM_SYMBOL(__xray_CustomEvent)
261	ASM_HIDDEN(__xray_CustomEvent)
262	.align 16, 0x90
263	ASM_TYPE_FUNCTION(__xray_CustomEvent)
264# LLVM-MCA-BEGIN __xray_CustomEvent
265ASM_SYMBOL(__xray_CustomEvent):
266	CFI_STARTPROC
267	SAVE_REGISTERS
268
269	// We take two arguments to this trampoline, which should be in rdi	and rsi
270	// already.
271	movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
272	testq %rax,%rax
273	je LOCAL_LABEL(customEventCleanup)
274
275	callq	*%rax
276
277LOCAL_LABEL(customEventCleanup):
278	RESTORE_REGISTERS
279	retq
280# LLVM-MCA-END
281	ASM_SIZE(__xray_CustomEvent)
282	CFI_ENDPROC
283
284//===----------------------------------------------------------------------===//
285
286	.global ASM_SYMBOL(__xray_TypedEvent)
287	ASM_HIDDEN(__xray_TypedEvent)
288	.align 16, 0x90
289	ASM_TYPE_FUNCTION(__xray_TypedEvent)
290# LLVM-MCA-BEGIN __xray_TypedEvent
291ASM_SYMBOL(__xray_TypedEvent):
292	CFI_STARTPROC
293	SAVE_REGISTERS
294
295	// We pass three arguments to this trampoline, which should be in rdi, rsi
296	// and rdx without our intervention.
297	movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax
298	testq %rax,%rax
299	je LOCAL_LABEL(typedEventCleanup)
300
301	callq	*%rax
302
303LOCAL_LABEL(typedEventCleanup):
304	RESTORE_REGISTERS
305	retq
306# LLVM-MCA-END
307	ASM_SIZE(__xray_TypedEvent)
308	CFI_ENDPROC
309
310//===----------------------------------------------------------------------===//
311
312NO_EXEC_STACK_DIRECTIVE
313