1/* -----------------------------------------------------------------------
2   v9.S - Copyright (c) 2000, 2003, 2004, 2008 Red Hat, Inc.
3
4   SPARC 64-bit Foreign Function Interface
5
6   Permission is hereby granted, free of charge, to any person obtaining
7   a copy of this software and associated documentation files (the
8   ``Software''), to deal in the Software without restriction, including
9   without limitation the rights to use, copy, modify, merge, publish,
10   distribute, sublicense, and/or sell copies of the Software, and to
11   permit persons to whom the Software is furnished to do so, subject to
12   the following conditions:
13
14   The above copyright notice and this permission notice shall be included
15   in all copies or substantial portions of the Software.
16
17   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
18   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24   DEALINGS IN THE SOFTWARE.
25   ----------------------------------------------------------------------- */
26
27#define LIBFFI_ASM
28#include <fficonfig.h>
29#include <ffi.h>
30#include "internal.h"
31
32#ifdef SPARC64
33
34#define C2(X, Y)  X ## Y
35#define C1(X, Y)  C2(X, Y)
36
37#ifdef __USER_LABEL_PREFIX__
38# define C(Y)	C1(__USER_LABEL_PREFIX__, Y)
39#else
40# define C(Y)	Y
41#endif
42#define L(Y)	C1(.L, Y)
43
44#if defined(__sun__) && defined(__svr4__)
45# define E(INDEX)	.align 16
46#else
47# define E(INDEX)	.align 16; .org 2b + INDEX * 16
48#endif
49
50#define STACK_BIAS 2047
51
52	.text
53        .align 8
54	.globl	C(ffi_call_v9)
55	.type	C(ffi_call_v9),#function
56	FFI_HIDDEN(C(ffi_call_v9))
57
58C(ffi_call_v9):
59.LUW0:
60	save	%sp, %o4, %sp
61.LUW1:
62	mov	%i0, %o0			! copy cif
63	add	%sp, STACK_BIAS+128+48, %o1	! load args area
64	mov	%i2, %o2			! copy rvalue
65	call	C(ffi_prep_args_v9)
66	 mov	%i3, %o3			! copy avalue
67
68	andcc	%o0, SPARC_FLAG_FP_ARGS, %g0	! need fp regs?
69	add	%sp, 48, %sp			! deallocate prep frame
70	be,pt	%xcc, 1f
71	 mov	%o0, %l0			! save flags
72
73	ldd	[%sp+STACK_BIAS+128], %f0	! load all fp arg regs
74	ldd	[%sp+STACK_BIAS+128+8], %f2
75	ldd	[%sp+STACK_BIAS+128+16], %f4
76	ldd	[%sp+STACK_BIAS+128+24], %f6
77	ldd	[%sp+STACK_BIAS+128+32], %f8
78	ldd	[%sp+STACK_BIAS+128+40], %f10
79	ldd	[%sp+STACK_BIAS+128+48], %f12
80	ldd	[%sp+STACK_BIAS+128+56], %f14
81	ldd	[%sp+STACK_BIAS+128+64], %f16
82	ldd	[%sp+STACK_BIAS+128+72], %f18
83	ldd	[%sp+STACK_BIAS+128+80], %f20
84	ldd	[%sp+STACK_BIAS+128+88], %f22
85	ldd	[%sp+STACK_BIAS+128+96], %f24
86	ldd	[%sp+STACK_BIAS+128+104], %f26
87	ldd	[%sp+STACK_BIAS+128+112], %f28
88	ldd	[%sp+STACK_BIAS+128+120], %f30
89
901:	ldx	[%sp+STACK_BIAS+128], %o0	! load all int arg regs
91	ldx	[%sp+STACK_BIAS+128+8], %o1
92	ldx	[%sp+STACK_BIAS+128+16], %o2
93	ldx	[%sp+STACK_BIAS+128+24], %o3
94	ldx	[%sp+STACK_BIAS+128+32], %o4
95	ldx	[%sp+STACK_BIAS+128+40], %o5
96	call	%i1
97	 mov	%i5, %g5			! load static chain
98
990:	call	1f		! load pc in %o7
100	 and	%l0, SPARC_FLAG_RET_MASK, %l1
1011:	sll	%l1, 4, %l1
102	add	%o7, %l1, %o7	! o7 = 0b + ret_type*16
103	jmp	%o7+(2f-0b)
104	 nop
105
106	.align	16
1072:
108E(SPARC_RET_VOID)
109	return	%i7+8
110	 nop
111E(SPARC_RET_STRUCT)
112	add	%sp, STACK_BIAS-64+128+48, %l2
113	sub	%sp, 64, %sp
114	b	8f
115	 stx	%o0, [%l2]
116E(SPARC_RET_UINT8)
117	and	%o0, 0xff, %i0
118	return	%i7+8
119	  stx	%o0, [%o2]
120E(SPARC_RET_SINT8)
121	sll	%o0, 24, %o0
122	sra	%o0, 24, %i0
123	return	%i7+8
124	 stx	%o0, [%o2]
125E(SPARC_RET_UINT16)
126	sll	%o0, 16, %o0
127	srl	%o0, 16, %i0
128	return	%i7+8
129	 stx	%o0, [%o2]
130E(SPARC_RET_SINT16)
131	sll	%o0, 16, %o0
132	sra	%o0, 16, %i0
133	return	%i7+8
134	 stx	%o0, [%o2]
135E(SPARC_RET_UINT32)
136	srl	%o0, 0, %i0
137	return	%i7+8
138	 stx	%o0, [%o2]
139E(SP_V9_RET_SINT32)
140	sra	%o0, 0, %i0
141	return	%i7+8
142	 stx	%o0, [%o2]
143E(SPARC_RET_INT64)
144	stx	%o0, [%i2]
145	return	%i7+8
146	 nop
147E(SPARC_RET_INT128)
148	stx	%o0, [%i2]
149	stx	%o1, [%i2+8]
150	return	%i7+8
151	 nop
152E(SPARC_RET_F_8)
153	st	%f7, [%i2+7*4]
154	nop
155	st	%f6, [%i2+6*4]
156	nop
157E(SPARC_RET_F_6)
158	st	%f5, [%i2+5*4]
159	nop
160	st	%f4, [%i2+4*4]
161	nop
162E(SPARC_RET_F_4)
163	std	%f2, [%i2+2*4]
164	return	%i7+8
165	 std	%f0, [%o2]
166E(SPARC_RET_F_2)
167	return	%i7+8
168	 std	%f0, [%o2]
169E(SP_V9_RET_F_3)
170	st	%f2, [%i2+2*4]
171	nop
172	st	%f1, [%i2+1*4]
173	nop
174E(SPARC_RET_F_1)
175	return	%i7+8
176	 st	%f0, [%o2]
177
178	! Finish the SPARC_RET_STRUCT sequence.
179	.align	8
1808:	stx	%o1, [%l2+8]
181	stx	%o2, [%l2+16]
182	stx	%o3, [%l2+24]
183	std	%f0, [%l2+32]
184	std	%f2, [%l2+40]
185	std	%f4, [%l2+48]
186	std	%f6, [%l2+56]
187
188	! Copy the structure into place.
189	srl	%l0, SPARC_SIZEMASK_SHIFT, %o0	! load size_mask
190	mov	%i2, %o1			! load dst
191	mov	%l2, %o2			! load src_gp
192	call	C(ffi_struct_float_copy)
193	 add	%l2, 32, %o3			! load src_fp
194
195	return	%i7+8
196	 nop
197
198.LUW2:
199	.size	C(ffi_call_v9), . - C(ffi_call_v9)
200
201
202#undef STACKFRAME
203#define	STACKFRAME	 336	/* 16*8 register window +
204				   6*8 args backing store +
205				   20*8 locals */
206#define	FP		%fp+STACK_BIAS
207
208/* ffi_closure_v9(...)
209
210   Receives the closure argument in %g1.   */
211
212	.align 8
213	.globl	C(ffi_go_closure_v9)
214	.type	C(ffi_go_closure_v9),#function
215	FFI_HIDDEN(C(ffi_go_closure_v9))
216
217C(ffi_go_closure_v9):
218.LUW3:
219	save	%sp, -STACKFRAME, %sp
220.LUW4:
221	ldx	[%g5+8], %o0
222	ldx	[%g5+16], %o1
223	b	0f
224	 mov	%g5, %o2
225
226.LUW5:
227	.size	C(ffi_go_closure_v9), . - C(ffi_go_closure_v9)
228
229	.align 8
230	.globl	C(ffi_closure_v9)
231	.type	C(ffi_closure_v9),#function
232	FFI_HIDDEN(C(ffi_closure_v9))
233
234C(ffi_closure_v9):
235.LUW6:
236	save	%sp, -STACKFRAME, %sp
237.LUW7:
238	ldx	[%g1+FFI_TRAMPOLINE_SIZE], %o0
239	ldx	[%g1+FFI_TRAMPOLINE_SIZE+8], %o1
240	ldx	[%g1+FFI_TRAMPOLINE_SIZE+16], %o2
2410:
242	! Store all of the potential argument registers in va_list format.
243	stx	%i0, [FP+128+0]
244	stx	%i1, [FP+128+8]
245	stx	%i2, [FP+128+16]
246	stx	%i3, [FP+128+24]
247	stx	%i4, [FP+128+32]
248	stx	%i5, [FP+128+40]
249
250	! Store possible floating point argument registers too.
251	std	%f0,  [FP-128]
252	std	%f2,  [FP-120]
253	std	%f4,  [FP-112]
254	std	%f6,  [FP-104]
255	std	%f8,  [FP-96]
256	std	%f10, [FP-88]
257	std     %f12, [FP-80]
258	std     %f14, [FP-72]
259	std     %f16, [FP-64]
260	std     %f18, [FP-56]
261	std     %f20, [FP-48]
262	std     %f22, [FP-40]
263	std     %f24, [FP-32]
264	std     %f26, [FP-24]
265	std     %f28, [FP-16]
266	std     %f30, [FP-8]
267
268	! Call ffi_closure_sparc_inner to do the bulk of the work.
269	add	%fp, STACK_BIAS-160, %o3
270	add	%fp, STACK_BIAS+128, %o4
271	call	C(ffi_closure_sparc_inner_v9)
272	 add	%fp, STACK_BIAS-128, %o5
273
2740:	call	1f		! load pc in %o7
275	 and	%o0, SPARC_FLAG_RET_MASK, %o0
2761:	sll	%o0, 4, %o0	! o2 = i2 * 16
277	add	%o7, %o0, %o7	! o7 = 0b + i2*16
278	jmp	%o7+(2f-0b)
279	 nop
280
281	! Note that we cannot load the data in the delay slot of
282	! the return insn because the data is in the stack frame
283	! that is deallocated by the return.
284	.align	16
2852:
286E(SPARC_RET_VOID)
287	return	%i7+8
288	 nop
289E(SPARC_RET_STRUCT)
290	ldx	[FP-160], %i0
291	ldd	[FP-160], %f0
292	b	8f
293	 ldx	[FP-152], %i1
294E(SPARC_RET_UINT8)
295	ldub	[FP-160+7], %i0
296	return	%i7+8
297	 nop
298E(SPARC_RET_SINT8)
299	ldsb	[FP-160+7], %i0
300	return	%i7+8
301	 nop
302E(SPARC_RET_UINT16)
303	lduh	[FP-160+6], %i0
304	return	%i7+8
305	 nop
306E(SPARC_RET_SINT16)
307	ldsh	[FP-160+6], %i0
308	return	%i7+8
309	 nop
310E(SPARC_RET_UINT32)
311	lduw	[FP-160+4], %i0
312	return	%i7+8
313	 nop
314E(SP_V9_RET_SINT32)
315	ldsw	[FP-160+4], %i0
316	return	%i7+8
317	 nop
318E(SPARC_RET_INT64)
319	ldx	[FP-160], %i0
320	return	%i7+8
321	 nop
322E(SPARC_RET_INT128)
323	ldx	[FP-160], %i0
324	ldx	[FP-160+8], %i1
325	return	%i7+8
326	 nop
327E(SPARC_RET_F_8)
328	ld	[FP-160+7*4], %f7
329	nop
330	ld	[FP-160+6*4], %f6
331	nop
332E(SPARC_RET_F_6)
333	ld	[FP-160+5*4], %f5
334	nop
335	ld	[FP-160+4*4], %f4
336	nop
337E(SPARC_RET_F_4)
338	ldd	[FP-160], %f0
339	ldd	[FP-160+8], %f2
340	return	%i7+8
341	 nop
342E(SPARC_RET_F_2)
343	ldd	[FP-160], %f0
344	return	%i7+8
345	 nop
346E(SP_V9_RET_F_3)
347	ld	[FP-160+2*4], %f2
348	nop
349	ld	[FP-160+1*4], %f1
350	nop
351E(SPARC_RET_F_1)
352	ld	[FP-160], %f0
353	return	%i7+8
354	 nop
355
356	! Finish the SPARC_RET_STRUCT sequence.
357	.align	8
3588:	ldd	[FP-152], %f2
359	ldx	[FP-144], %i2
360	ldd	[FP-144], %f4
361	ldx	[FP-136], %i3
362	ldd	[FP-136], %f6
363	return	%i7+8
364	 nop
365
366.LUW8:
367	.size	C(ffi_closure_v9), . - C(ffi_closure_v9)
368
369#ifdef HAVE_RO_EH_FRAME
370        .section        ".eh_frame",#alloc
371#else
372        .section        ".eh_frame",#alloc,#write
373#endif
374
375#ifdef HAVE_AS_SPARC_UA_PCREL
376# define FDE_RANGE(B, E)  .long %r_disp32(B), E - B
377#else
378# define FDE_RANGE(B, E)  .align 8; .xword B, E - B
379#endif
380
381	.align 8
382.LCIE:
383	.long	.LECIE - .LSCIE		! CIE Length
384.LSCIE:
385	.long	0			! CIE Identifier Tag
386	.byte	1			! CIE Version
387	.ascii	"zR\0"			! CIE Augmentation
388	.byte	4			! CIE Code Alignment Factor
389	.byte	0x78			! CIE Data Alignment Factor
390	.byte	15			! CIE RA Column
391	.byte	1			! Augmentation size
392#ifdef HAVE_AS_SPARC_UA_PCREL
393	.byte	0x1b			! FDE Encoding (pcrel sdata4)
394#else
395	.byte	0x50			! FDE Encoding (aligned absolute)
396#endif
397	.byte	0xc, 14, 0xff, 0xf	! DW_CFA_def_cfa, %o6, offset 0x7ff
398	.align	8
399.LECIE:
400
401	.long	.LEFDE1 - .LSFDE1	! FDE Length
402.LSFDE1:
403	.long	.LSFDE1 - .LCIE		! FDE CIE offset
404	FDE_RANGE(.LUW0, .LUW2)
405	.byte	0			! Augmentation size
406	.byte	0x40+1			! DW_CFA_advance_loc 4
407	.byte	0xd, 30			! DW_CFA_def_cfa_register, %i6
408	.byte	0x2d			! DW_CFA_GNU_window_save
409	.byte	0x9, 15, 31		! DW_CFA_register, %o7, %i7
410	.align	8
411.LEFDE1:
412
413	.long	.LEFDE2 - .LSFDE2	! FDE Length
414.LSFDE2:
415	.long	.LSFDE2 - .LCIE		! FDE CIE offset
416	FDE_RANGE(.LUW3, .LUW5)
417	.byte	0			! Augmentation size
418	.byte	0x40+1			! DW_CFA_advance_loc 4
419	.byte	0xd, 30			! DW_CFA_def_cfa_register, %i6
420	.byte	0x2d			! DW_CFA_GNU_window_save
421	.byte	0x9, 15, 31		! DW_CFA_register, %o7, %i7
422	.align	8
423.LEFDE2:
424
425	.long	.LEFDE3 - .LSFDE3	! FDE Length
426.LSFDE3:
427	.long	.LSFDE3 - .LCIE		! FDE CIE offset
428	FDE_RANGE(.LUW6, .LUW8)
429	.byte	0			! Augmentation size
430	.byte	0x40+1			! DW_CFA_advance_loc 4
431	.byte	0xd, 30			! DW_CFA_def_cfa_register, %i6
432	.byte	0x2d			! DW_CFA_GNU_window_save
433	.byte	0x9, 15, 31		! DW_CFA_register, %o7, %i7
434	.align	8
435.LEFDE3:
436
437#endif /* SPARC64 */
438#ifdef __linux__
439	.section	.note.GNU-stack,"",@progbits
440#endif
441