1/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3Permission is hereby granted, free of charge, to any person obtaining
4a copy of this software and associated documentation files (the
5``Software''), to deal in the Software without restriction, including
6without limitation the rights to use, copy, modify, merge, publish,
7distribute, sublicense, and/or sell copies of the Software, and to
8permit persons to whom the Software is furnished to do so, subject to
9the following conditions:
10
11The above copyright notice and this permission notice shall be
12included in all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21
22#define LIBFFI_ASM
23#include <fficonfig.h>
24#include <ffi.h>
25#include <ffi_cfi.h>
26#include "internal.h"
27
28#ifdef HAVE_MACHINE_ASM_H
29#include <machine/asm.h>
30#else
31#ifdef __USER_LABEL_PREFIX__
32#define CONCAT1(a, b) CONCAT2(a, b)
33#define CONCAT2(a, b) a ## b
34
35/* Use the right prefix for global labels.  */
36#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
37#else
38#define CNAME(x) x
39#endif
40#endif
41
42#ifdef __AARCH64EB__
43# define BE(X)	X
44#else
45# define BE(X)	0
46#endif
47
48#ifdef __ILP32__
49#define PTR_REG(n)      w##n
50#else
51#define PTR_REG(n)      x##n
52#endif
53
54#ifdef __ILP32__
55#define PTR_SIZE	4
56#else
57#define PTR_SIZE	8
58#endif
59
60	.text
61	.align 4
62
63/* ffi_call_SYSV
64   extern void ffi_call_SYSV (void *stack, void *frame,
65			      void (*fn)(void), void *rvalue,
66			      int flags, void *closure);
67
68   Therefore on entry we have:
69
70   x0 stack
71   x1 frame
72   x2 fn
73   x3 rvalue
74   x4 flags
75   x5 closure
76*/
77
78	cfi_startproc
79CNAME(ffi_call_SYSV):
80	/* Use a stack frame allocated by our caller.  */
81	cfi_def_cfa(x1, 32);
82	stp	x29, x30, [x1]
83	mov	x29, x1
84	mov	sp, x0
85	cfi_def_cfa_register(x29)
86	cfi_rel_offset (x29, 0)
87	cfi_rel_offset (x30, 8)
88
89	mov	x9, x2			/* save fn */
90	mov	x8, x3			/* install structure return */
91#ifdef FFI_GO_CLOSURES
92	mov	x18, x5			/* install static chain */
93#endif
94	stp	x3, x4, [x29, #16]	/* save rvalue and flags */
95
96	/* Load the vector argument passing registers, if necessary.  */
97	tbz	w4, #AARCH64_FLAG_ARG_V_BIT, 1f
98	ldp     q0, q1, [sp, #0]
99	ldp     q2, q3, [sp, #32]
100	ldp     q4, q5, [sp, #64]
101	ldp     q6, q7, [sp, #96]
1021:
103	/* Load the core argument passing registers, including
104	   the structure return pointer.  */
105	ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0]
106	ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16]
107	ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32]
108	ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48]
109
110	/* Deallocate the context, leaving the stacked arguments.  */
111	add	sp, sp, #CALL_CONTEXT_SIZE
112
113	blr     x9			/* call fn */
114
115	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */
116
117	/* Partially deconstruct the stack frame.  */
118	mov     sp, x29
119	cfi_def_cfa_register (sp)
120	ldp     x29, x30, [x29]
121
122	/* Save the return value as directed.  */
123	adr	x5, 0f
124	and	w4, w4, #AARCH64_RET_MASK
125	add	x5, x5, x4, lsl #3
126	br	x5
127
128	/* Note that each table entry is 2 insns, and thus 8 bytes.
129	   For integer data, note that we're storing into ffi_arg
130	   and therefore we want to extend to 64 bits; these types
131	   have two consecutive entries allocated for them.  */
132	.align	4
1330:	ret				/* VOID */
134	nop
1351:	str	x0, [x3]		/* INT64 */
136	ret
1372:	stp	x0, x1, [x3]		/* INT128 */
138	ret
1393:	brk	#1000			/* UNUSED */
140	ret
1414:	brk	#1000			/* UNUSED */
142	ret
1435:	brk	#1000			/* UNUSED */
144	ret
1456:	brk	#1000			/* UNUSED */
146	ret
1477:	brk	#1000			/* UNUSED */
148	ret
1498:	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */
150	ret
1519:	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */
152	ret
15310:	stp	s0, s1, [x3]		/* S2 */
154	ret
15511:	str	s0, [x3]		/* S1 */
156	ret
15712:	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */
158	ret
15913:	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */
160	ret
16114:	stp	d0, d1, [x3]		/* D2 */
162	ret
16315:	str	d0, [x3]		/* D1 */
164	ret
16516:	str	q3, [x3, #48]		/* Q4 */
166	nop
16717:	str	q2, [x3, #32]		/* Q3 */
168	nop
16918:	stp	q0, q1, [x3]		/* Q2 */
170	ret
17119:	str	q0, [x3]		/* Q1 */
172	ret
17320:	uxtb	w0, w0			/* UINT8 */
174	str	x0, [x3]
17521:	ret				/* reserved */
176	nop
17722:	uxth	w0, w0			/* UINT16 */
178	str	x0, [x3]
17923:	ret				/* reserved */
180	nop
18124:	mov	w0, w0			/* UINT32 */
182	str	x0, [x3]
18325:	ret				/* reserved */
184	nop
18526:	sxtb	x0, w0			/* SINT8 */
186	str	x0, [x3]
18727:	ret				/* reserved */
188	nop
18928:	sxth	x0, w0			/* SINT16 */
190	str	x0, [x3]
19129:	ret				/* reserved */
192	nop
19330:	sxtw	x0, w0			/* SINT32 */
194	str	x0, [x3]
19531:	ret				/* reserved */
196	nop
197
198	cfi_endproc
199
200	.globl	CNAME(ffi_call_SYSV)
201#ifdef __ELF__
202	.type	CNAME(ffi_call_SYSV), #function
203	.hidden	CNAME(ffi_call_SYSV)
204	.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
205#endif
206
207/* ffi_closure_SYSV
208
209   Closure invocation glue. This is the low level code invoked directly by
210   the closure trampoline to setup and call a closure.
211
212   On entry x17 points to a struct ffi_closure, x16 has been clobbered
213   all other registers are preserved.
214
215   We allocate a call context and save the argument passing registers,
216   then invoked the generic C ffi_closure_SYSV_inner() function to do all
217   the real work, on return we load the result passing registers back from
218   the call context.
219*/
220
221#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
222
223	.align 4
224CNAME(ffi_closure_SYSV_V):
225	cfi_startproc
226	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
227	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
228	cfi_rel_offset (x29, 0)
229	cfi_rel_offset (x30, 8)
230
231	/* Save the argument passing vector registers.  */
232	stp     q0, q1, [sp, #16 + 0]
233	stp     q2, q3, [sp, #16 + 32]
234	stp     q4, q5, [sp, #16 + 64]
235	stp     q6, q7, [sp, #16 + 96]
236	b	0f
237	cfi_endproc
238
239	.globl	CNAME(ffi_closure_SYSV_V)
240#ifdef __ELF__
241	.type	CNAME(ffi_closure_SYSV_V), #function
242	.hidden	CNAME(ffi_closure_SYSV_V)
243	.size	CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
244#endif
245
246	.align	4
247	cfi_startproc
248CNAME(ffi_closure_SYSV):
249	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
250	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
251	cfi_rel_offset (x29, 0)
252	cfi_rel_offset (x30, 8)
2530:
254	mov     x29, sp
255
256	/* Save the argument passing core registers.  */
257	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
258	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
259	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
260	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
261
262	/* Load ffi_closure_inner arguments.  */
263	ldp	PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */
264	ldr	PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2]	/* load user_data */
265.Ldo_closure:
266	add	x3, sp, #16				/* load context */
267	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */
268	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */
269	mov	x6, x8					/* load struct_rval */
270	bl      CNAME(ffi_closure_SYSV_inner)
271
272	/* Load the return value as directed.  */
273	adr	x1, 0f
274	and	w0, w0, #AARCH64_RET_MASK
275	add	x1, x1, x0, lsl #3
276	add	x3, sp, #16+CALL_CONTEXT_SIZE
277	br	x1
278
279	/* Note that each table entry is 2 insns, and thus 8 bytes.  */
280	.align	4
2810:	b	99f			/* VOID */
282	nop
2831:	ldr	x0, [x3]		/* INT64 */
284	b	99f
2852:	ldp	x0, x1, [x3]		/* INT128 */
286	b	99f
2873:	brk	#1000			/* UNUSED */
288	nop
2894:	brk	#1000			/* UNUSED */
290	nop
2915:	brk	#1000			/* UNUSED */
292	nop
2936:	brk	#1000			/* UNUSED */
294	nop
2957:	brk	#1000			/* UNUSED */
296	nop
2978:	ldr	s3, [x3, #12]		/* S4 */
298	nop
2999:	ldr	s2, [x2, #8]		/* S3 */
300	nop
30110:	ldp	s0, s1, [x3]		/* S2 */
302	b	99f
30311:	ldr	s0, [x3]		/* S1 */
304	b	99f
30512:	ldr	d3, [x3, #24]		/* D4 */
306	nop
30713:	ldr	d2, [x3, #16]		/* D3 */
308	nop
30914:	ldp	d0, d1, [x3]		/* D2 */
310	b	99f
31115:	ldr	d0, [x3]		/* D1 */
312	b	99f
31316:	ldr	q3, [x3, #48]		/* Q4 */
314	nop
31517:	ldr	q2, [x3, #32]		/* Q3 */
316	nop
31718:	ldp	q0, q1, [x3]		/* Q2 */
318	b	99f
31919:	ldr	q0, [x3]		/* Q1 */
320	b	99f
32120:	ldrb	w0, [x3, #BE(7)]	/* UINT8 */
322	b	99f
32321:	brk	#1000			/* reserved */
324	nop
32522:	ldrh	w0, [x3, #BE(6)]	/* UINT16 */
326	b	99f
32723:	brk	#1000			/* reserved */
328	nop
32924:	ldr	w0, [x3, #BE(4)]	/* UINT32 */
330	b	99f
33125:	brk	#1000			/* reserved */
332	nop
33326:	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */
334	b	99f
33527:	brk	#1000			/* reserved */
336	nop
33728:	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */
338	b	99f
33929:	brk	#1000			/* reserved */
340	nop
34130:	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */
342	nop
34331:					/* reserved */
34499:	ldp     x29, x30, [sp], #ffi_closure_SYSV_FS
345	cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
346	cfi_restore (x29)
347	cfi_restore (x30)
348	ret
349	cfi_endproc
350
351	.globl	CNAME(ffi_closure_SYSV)
352#ifdef __ELF__
353	.type	CNAME(ffi_closure_SYSV), #function
354	.hidden	CNAME(ffi_closure_SYSV)
355	.size	CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
356#endif
357
358#if FFI_EXEC_TRAMPOLINE_TABLE
359    .align 12
360CNAME(ffi_closure_trampoline_table_page):
361    .rept 16384 / FFI_TRAMPOLINE_SIZE
362    adr	x17, -16384
363    adr	x16, -16380
364    ldr x16, [x16]
365    ldr x17, [x17]
366    br	x16
367    .endr
368
369    .globl CNAME(ffi_closure_trampoline_table_page)
370    #ifdef __ELF__
371    	.type	CNAME(ffi_closure_trampoline_table_page), #function
372    	.hidden	CNAME(ffi_closure_trampoline_table_page)
373    	.size	CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page)
374    #endif
375#endif
376
377#ifdef FFI_GO_CLOSURES
378	.align 4
379CNAME(ffi_go_closure_SYSV_V):
380	cfi_startproc
381	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
382	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
383	cfi_rel_offset (x29, 0)
384	cfi_rel_offset (x30, 8)
385
386	/* Save the argument passing vector registers.  */
387	stp     q0, q1, [sp, #16 + 0]
388	stp     q2, q3, [sp, #16 + 32]
389	stp     q4, q5, [sp, #16 + 64]
390	stp     q6, q7, [sp, #16 + 96]
391	b	0f
392	cfi_endproc
393
394	.globl	CNAME(ffi_go_closure_SYSV_V)
395#ifdef __ELF__
396	.type	CNAME(ffi_go_closure_SYSV_V), #function
397	.hidden	CNAME(ffi_go_closure_SYSV_V)
398	.size	CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V)
399#endif
400
401	.align	4
402	cfi_startproc
403CNAME(ffi_go_closure_SYSV):
404	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
405	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
406	cfi_rel_offset (x29, 0)
407	cfi_rel_offset (x30, 8)
4080:
409	mov     x29, sp
410
411	/* Save the argument passing core registers.  */
412	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
413	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
414	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
415	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
416
417	/* Load ffi_closure_inner arguments.  */
418	ldp	PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
419	mov	x2, x18					/* load user_data */
420	b	.Ldo_closure
421	cfi_endproc
422
423	.globl	CNAME(ffi_go_closure_SYSV)
424#ifdef __ELF__
425	.type	CNAME(ffi_go_closure_SYSV), #function
426	.hidden	CNAME(ffi_go_closure_SYSV)
427	.size	CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV)
428#endif
429#endif /* FFI_GO_CLOSURES */
430
431#if defined __ELF__ && defined __linux__
432	.section .note.GNU-stack,"",%progbits
433#endif
434
435