1#define LIBFFI_ASM
2#include <fficonfig.h>
3#include <ffi.h>
4#include <ffi_cfi.h>
5
6#if defined(HAVE_AS_CFI_PSEUDO_OP)
7        .cfi_sections   .debug_frame
8#endif
9
10#define arg0	%rcx
11#define arg1	%rdx
12#define arg2	%r8
13#define arg3	%r9
14
15#ifdef SYMBOL_UNDERSCORE
16#define SYMBOL_NAME(name) _##name
17#else
18#define SYMBOL_NAME(name) name
19#endif
20
21.macro E which
22	.align	8
23	.org	0b + \which * 8
24.endm
25
26	.text
27
28/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
29
30   Bit o trickiness here -- FRAME is the base of the stack frame
31   for this function.  This has been allocated by ffi_call.  We also
32   deallocate some of the stack that has been alloca'd.  */
33
34	.align	8
35	.globl	ffi_call_win64
36
37	.seh_proc ffi_call_win64
38ffi_call_win64:
39	cfi_startproc
40	/* Set up the local stack frame and install it in rbp/rsp.  */
41	movq	(%rsp), %rax
42	movq	%rbp, (arg1)
43	movq	%rax, 8(arg1)
44	movq	arg1, %rbp
45	cfi_def_cfa(%rbp, 16)
46	cfi_rel_offset(%rbp, 0)
47	.seh_pushreg %rbp
48	.seh_setframe %rbp, 0
49	.seh_endprologue
50	movq	arg0, %rsp
51
52	movq	arg2, %r10
53
54	/* Load all slots into both general and xmm registers.  */
55	movq	(%rsp), %rcx
56	movsd	(%rsp), %xmm0
57	movq	8(%rsp), %rdx
58	movsd	8(%rsp), %xmm1
59	movq	16(%rsp), %r8
60	movsd	16(%rsp), %xmm2
61	movq	24(%rsp), %r9
62	movsd	24(%rsp), %xmm3
63
64	call	*16(%rbp)
65
66	movl	24(%rbp), %ecx
67	movq	32(%rbp), %r8
68	leaq	0f(%rip), %r10
69	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx
70	leaq	(%r10, %rcx, 8), %r10
71	ja	99f
72	jmp	*%r10
73
74/* Below, we're space constrained most of the time.  Thus we eschew the
75   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
76.macro epilogue
77	leaveq
78	cfi_remember_state
79	cfi_def_cfa(%rsp, 8)
80	cfi_restore(%rbp)
81	ret
82	cfi_restore_state
83.endm
84
85	.align	8
860:
87E FFI_TYPE_VOID
88	epilogue
89E FFI_TYPE_INT
90	movslq	%eax, %rax
91	movq	%rax, (%r8)
92	epilogue
93E FFI_TYPE_FLOAT
94	movss	%xmm0, (%r8)
95	epilogue
96E FFI_TYPE_DOUBLE
97	movsd	%xmm0, (%r8)
98	epilogue
99E FFI_TYPE_LONGDOUBLE
100	call	abort
101E FFI_TYPE_UINT8
102	movzbl	%al, %eax
103	movq	%rax, (%r8)
104	epilogue
105E FFI_TYPE_SINT8
106	movsbq	%al, %rax
107	jmp	98f
108E FFI_TYPE_UINT16
109	movzwl	%ax, %eax
110	movq	%rax, (%r8)
111	epilogue
112E FFI_TYPE_SINT16
113	movswq	%ax, %rax
114	jmp	98f
115E FFI_TYPE_UINT32
116	movl	%eax, %eax
117	movq	%rax, (%r8)
118	epilogue
119E FFI_TYPE_SINT32
120	movslq	%eax, %rax
121	movq	%rax, (%r8)
122	epilogue
123E FFI_TYPE_UINT64
12498:	movq	%rax, (%r8)
125	epilogue
126E FFI_TYPE_SINT64
127	movq	%rax, (%r8)
128	epilogue
129E FFI_TYPE_STRUCT
130	epilogue
131E FFI_TYPE_POINTER
132	movq	%rax, (%r8)
133	epilogue
134E FFI_TYPE_COMPLEX
135	call	abort
136E FFI_TYPE_SMALL_STRUCT_1B
137	movb	%al, (%r8)
138	epilogue
139E FFI_TYPE_SMALL_STRUCT_2B
140	movw	%ax, (%r8)
141	epilogue
142E FFI_TYPE_SMALL_STRUCT_4B
143	movl	%eax, (%r8)
144	epilogue
145
146	.align	8
14799:	call	abort
148
149.purgem epilogue
150
151	cfi_endproc
152	.seh_endproc
153
154
155/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
156   16 bytes of result, 32 bytes of xmm registers.  */
157#define ffi_clo_FS	(32+8+16+32)
158#define ffi_clo_OFF_R	(32+8)
159#define ffi_clo_OFF_X	(32+8+16)
160
161	.align	8
162	.globl	ffi_go_closure_win64
163
164	.seh_proc ffi_go_closure_win64
165ffi_go_closure_win64:
166	cfi_startproc
167	/* Save all integer arguments into the incoming reg stack space.  */
168	movq	arg0, 8(%rsp)
169	movq	arg1, 16(%rsp)
170	movq	arg2, 24(%rsp)
171	movq	arg3, 32(%rsp)
172
173	movq	8(%r10), arg0			/* load cif */
174	movq	16(%r10), arg1			/* load fun */
175	movq	%r10, arg2			/* closure is user_data */
176	jmp	0f
177	cfi_endproc
178	.seh_endproc
179
180	.align	8
181	.globl	ffi_closure_win64
182
183	.seh_proc ffi_closure_win64
184ffi_closure_win64:
185	cfi_startproc
186	/* Save all integer arguments into the incoming reg stack space.  */
187	movq	arg0, 8(%rsp)
188	movq	arg1, 16(%rsp)
189	movq	arg2, 24(%rsp)
190	movq	arg3, 32(%rsp)
191
192	movq	FFI_TRAMPOLINE_SIZE(%r10), arg0		/* load cif */
193	movq	FFI_TRAMPOLINE_SIZE+8(%r10), arg1	/* load fun */
194	movq	FFI_TRAMPOLINE_SIZE+16(%r10), arg2	/* load user_data */
1950:
196	subq	$ffi_clo_FS, %rsp
197	cfi_adjust_cfa_offset(ffi_clo_FS)
198	.seh_stackalloc ffi_clo_FS
199	.seh_endprologue
200
201	/* Save all sse arguments into the stack frame.  */
202	movsd	%xmm0, ffi_clo_OFF_X(%rsp)
203	movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
204	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp)
205	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp)
206
207	leaq	ffi_clo_OFF_R(%rsp), arg3
208	call	ffi_closure_win64_inner
209
210	/* Load the result into both possible result registers.  */
211	movq    ffi_clo_OFF_R(%rsp), %rax
212	movsd   ffi_clo_OFF_R(%rsp), %xmm0
213
214	addq	$ffi_clo_FS, %rsp
215	cfi_adjust_cfa_offset(-ffi_clo_FS)
216	ret
217
218	cfi_endproc
219	.seh_endproc
220