1/* -----------------------------------------------------------------------
2   sysv.S - Copyright (c) 2017  Anthony Green
3          - Copyright (c) 2013  The Written Word, Inc.
4          - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
5
6   X86 Foreign Function Interface
7
8   Permission is hereby granted, free of charge, to any person obtaining
9   a copy of this software and associated documentation files (the
10   ``Software''), to deal in the Software without restriction, including
11   without limitation the rights to use, copy, modify, merge, publish,
12   distribute, sublicense, and/or sell copies of the Software, and to
13   permit persons to whom the Software is furnished to do so, subject to
14   the following conditions:
15
16   The above copyright notice and this permission notice shall be included
17   in all copies or substantial portions of the Software.
18
19   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
20   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26   DEALINGS IN THE SOFTWARE.
27   ----------------------------------------------------------------------- */
28
29#ifndef __x86_64__
30#ifdef _MSC_VER
31
32#define LIBFFI_ASM
33#include <fficonfig.h>
34#include <ffi.h>
35#include <ffi_cfi.h>
36#include "internal.h"
37
38#define C2(X, Y)  X ## Y
39#define C1(X, Y)  C2(X, Y)
40#define L(X)     C1(L, X)
41# define ENDF(X) X ENDP
42
43/* This macro allows the safe creation of jump tables without an
44   actual table.  The entry points into the table are all 8 bytes.
45   The use of ORG asserts that we're at the correct location.  */
46/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
47#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
48# define E(BASE, X)	ALIGN 8
49#else
50# define E(BASE, X)	ALIGN 8; ORG BASE + X * 8
51#endif
52
53    .686P
54    .MODEL FLAT
55
56EXTRN	@ffi_closure_inner@8:PROC
57_TEXT SEGMENT
58
59/* This is declared as
60
61   void ffi_call_i386(struct call_frame *frame, char *argp)
62        __attribute__((fastcall));
63
64   Thus the arguments are present in
65
66        ecx: frame
67        edx: argp
68*/
69
70ALIGN 16
71PUBLIC @ffi_call_i386@8
72@ffi_call_i386@8 PROC
73L(UW0):
74	cfi_startproc
75 #if !HAVE_FASTCALL
76	mov	    ecx, [esp+4]
77	mov 	edx, [esp+8]
78 #endif
79	mov	    eax, [esp]		/* move the return address */
80	mov	    [ecx], ebp		/* store ebp into local frame */
81	mov 	[ecx+4], eax	/* store retaddr into local frame */
82
83	/* New stack frame based off ebp.  This is a itty bit of unwind
84	   trickery in that the CFA *has* changed.  There is no easy way
85	   to describe it correctly on entry to the function.  Fortunately,
86	   it doesn't matter too much since at all points we can correctly
87	   unwind back to ffi_call.  Note that the location to which we
88	   moved the return address is (the new) CFA-4, so from the
89	   perspective of the unwind info, it hasn't moved.  */
90	mov 	ebp, ecx
91L(UW1):
92	// cfi_def_cfa(%ebp, 8)
93	// cfi_rel_offset(%ebp, 0)
94
95	mov 	esp, edx		/* set outgoing argument stack */
96	mov 	eax, [20+R_EAX*4+ebp]	/* set register arguments */
97	mov 	edx, [20+R_EDX*4+ebp]
98	mov	    ecx, [20+R_ECX*4+ebp]
99
100	call	dword ptr [ebp+8]
101
102	mov	    ecx, [12+ebp]		/* load return type code */
103	mov 	[ebp+8], ebx		/* preserve %ebx */
104L(UW2):
105	// cfi_rel_offset(%ebx, 8)
106
107	and 	ecx, X86_RET_TYPE_MASK
108	lea 	ebx, [L(store_table) + ecx * 8]
109	mov 	ecx, [ebp+16]		/* load result address */
110	jmp	    ebx
111
112	ALIGN	8
113L(store_table):
114E(L(store_table), X86_RET_FLOAT)
115	fstp	DWORD PTR [ecx]
116	jmp	L(e1)
117E(L(store_table), X86_RET_DOUBLE)
118	fstp	QWORD PTR [ecx]
119	jmp	L(e1)
120E(L(store_table), X86_RET_LDOUBLE)
121	fstp	QWORD PTR [ecx]
122	jmp	L(e1)
123E(L(store_table), X86_RET_SINT8)
124	movsx	eax, al
125	mov	[ecx], eax
126	jmp	L(e1)
127E(L(store_table), X86_RET_SINT16)
128	movsx	eax, ax
129	mov	[ecx], eax
130	jmp	L(e1)
131E(L(store_table), X86_RET_UINT8)
132	movzx	eax, al
133	mov	[ecx], eax
134	jmp	L(e1)
135E(L(store_table), X86_RET_UINT16)
136	movzx	eax, ax
137	mov	[ecx], eax
138	jmp	L(e1)
139E(L(store_table), X86_RET_INT64)
140	mov	[ecx+4], edx
141	/* fallthru */
142E(L(store_table), X86_RET_int 32)
143	mov	[ecx], eax
144	/* fallthru */
145E(L(store_table), X86_RET_VOID)
146L(e1):
147	mov	    ebx, [ebp+8]
148	mov	    esp, ebp
149	pop 	ebp
150L(UW3):
151	// cfi_remember_state
152	// cfi_def_cfa(%esp, 4)
153	// cfi_restore(%ebx)
154	// cfi_restore(%ebp)
155	ret
156L(UW4):
157	// cfi_restore_state
158
159E(L(store_table), X86_RET_STRUCTPOP)
160	jmp	    L(e1)
161E(L(store_table), X86_RET_STRUCTARG)
162	jmp	    L(e1)
163E(L(store_table), X86_RET_STRUCT_1B)
164	mov 	[ecx], al
165	jmp	    L(e1)
166E(L(store_table), X86_RET_STRUCT_2B)
167	mov 	[ecx], ax
168	jmp	    L(e1)
169
170	/* Fill out the table so that bad values are predictable.  */
171E(L(store_table), X86_RET_UNUSED14)
172	int 3
173E(L(store_table), X86_RET_UNUSED15)
174	int 3
175
176L(UW5):
177	// cfi_endproc
178ENDF(@ffi_call_i386@8)
179
180/* The inner helper is declared as
181
182   void ffi_closure_inner(struct closure_frame *frame, char *argp)
183	__attribute_((fastcall))
184
185   Thus the arguments are placed in
186
187	ecx:	frame
188	edx:	argp
189*/
190
191/* Macros to help setting up the closure_data structure.  */
192
193#if HAVE_FASTCALL
194# define closure_FS	(40 + 4)
195# define closure_CF	0
196#else
197# define closure_FS	(8 + 40 + 12)
198# define closure_CF	8
199#endif
200
201FFI_CLOSURE_SAVE_REGS MACRO
202	mov 	[esp + closure_CF+16+R_EAX*4], eax
203	mov 	[esp + closure_CF+16+R_EDX*4], edx
204	mov 	[esp + closure_CF+16+R_ECX*4], ecx
205ENDM
206
207FFI_CLOSURE_COPY_TRAMP_DATA MACRO
208	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE]      /* copy cif */
209	mov 	ecx, [eax+FFI_TRAMPOLINE_SIZE+4]    /* copy fun */
210	mov 	eax, [eax+FFI_TRAMPOLINE_SIZE+8];   /* copy user_data */
211	mov 	[esp+closure_CF+28], edx
212	mov 	[esp+closure_CF+32], ecx
213	mov 	[esp+closure_CF+36], eax
214ENDM
215
216#if HAVE_FASTCALL
217FFI_CLOSURE_PREP_CALL MACRO
218	mov	    ecx, esp                    /* load closure_data */
219	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */
220ENDM
221#else
222FFI_CLOSURE_PREP_CALL MACRO
223	lea 	ecx, [esp+closure_CF]       /* load closure_data */
224	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */
225	mov 	[esp], ecx
226	mov 	[esp+4], edx
227ENDM
228#endif
229
230FFI_CLOSURE_CALL_INNER MACRO UWN
231	call	@ffi_closure_inner@8
232ENDM
233
234FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL
235	and	    eax, X86_RET_TYPE_MASK
236	lea 	edx, [LABEL+eax*8]
237	mov 	eax, [esp+closure_CF]       /* optimiztic load */
238	jmp	    edx
239ENDM
240
241ALIGN 16
242PUBLIC ffi_go_closure_EAX
243ffi_go_closure_EAX PROC C
244L(UW6):
245	// cfi_startproc
246	sub	esp, closure_FS
247L(UW7):
248	// cfi_def_cfa_offset(closure_FS + 4)
249	FFI_CLOSURE_SAVE_REGS
250	mov     edx, [eax+4]			/* copy cif */
251	mov 	ecx, [eax +8]			/* copy fun */
252	mov 	[esp+closure_CF+28], edx
253	mov 	[esp+closure_CF+32], ecx
254	mov 	[esp+closure_CF+36], eax	/* closure is user_data */
255	jmp	L(do_closure_i386)
256L(UW8):
257	// cfi_endproc
258ENDF(ffi_go_closure_EAX)
259
260ALIGN 16
261PUBLIC ffi_go_closure_ECX
262ffi_go_closure_ECX PROC C
263L(UW9):
264	// cfi_startproc
265	sub 	esp, closure_FS
266L(UW10):
267	// cfi_def_cfa_offset(closure_FS + 4)
268	FFI_CLOSURE_SAVE_REGS
269	mov 	edx, [ecx+4]			/* copy cif */
270	mov 	eax, [ecx+8]			/* copy fun */
271	mov 	[esp+closure_CF+28], edx
272	mov 	[esp+closure_CF+32], eax
273	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */
274	jmp	L(do_closure_i386)
275L(UW11):
276	// cfi_endproc
277ENDF(ffi_go_closure_ECX)
278
279/* The closure entry points are reached from the ffi_closure trampoline.
280   On entry, %eax contains the address of the ffi_closure.  */
281
282ALIGN 16
283PUBLIC ffi_closure_i386
284ffi_closure_i386 PROC C
285L(UW12):
286	// cfi_startproc
287	sub	    esp, closure_FS
288L(UW13):
289	// cfi_def_cfa_offset(closure_FS + 4)
290
291	FFI_CLOSURE_SAVE_REGS
292	FFI_CLOSURE_COPY_TRAMP_DATA
293
294	/* Entry point from preceeding Go closures.  */
295L(do_closure_i386)::
296
297	FFI_CLOSURE_PREP_CALL
298	FFI_CLOSURE_CALL_INNER(14)
299	FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2))
300
301    ALIGN 8
302L(load_table2):
303E(L(load_table2), X86_RET_FLOAT)
304	fld 	dword ptr [esp+closure_CF]
305	jmp	L(e2)
306E(L(load_table2), X86_RET_DOUBLE)
307	fld 	qword ptr [esp+closure_CF]
308	jmp	L(e2)
309E(L(load_table2), X86_RET_LDOUBLE)
310	fld 	qword ptr [esp+closure_CF]
311	jmp	L(e2)
312E(L(load_table2), X86_RET_SINT8)
313	movsx	eax, al
314	jmp	L(e2)
315E(L(load_table2), X86_RET_SINT16)
316	movsx	eax, ax
317	jmp	L(e2)
318E(L(load_table2), X86_RET_UINT8)
319	movzx	eax, al
320	jmp	L(e2)
321E(L(load_table2), X86_RET_UINT16)
322	movzx	eax, ax
323	jmp	L(e2)
324E(L(load_table2), X86_RET_INT64)
325	mov 	edx, [esp+closure_CF+4]
326	jmp	L(e2)
327E(L(load_table2), X86_RET_INT32)
328	nop
329	/* fallthru */
330E(L(load_table2), X86_RET_VOID)
331L(e2):
332	add 	esp, closure_FS
333L(UW16):
334	// cfi_adjust_cfa_offset(-closure_FS)
335	ret
336L(UW17):
337	// cfi_adjust_cfa_offset(closure_FS)
338E(L(load_table2), X86_RET_STRUCTPOP)
339	add 	esp, closure_FS
340L(UW18):
341	// cfi_adjust_cfa_offset(-closure_FS)
342	ret	4
343L(UW19):
344	// cfi_adjust_cfa_offset(closure_FS)
345E(L(load_table2), X86_RET_STRUCTARG)
346	jmp	L(e2)
347E(L(load_table2), X86_RET_STRUCT_1B)
348	movzx	eax, al
349	jmp	L(e2)
350E(L(load_table2), X86_RET_STRUCT_2B)
351	movzx	eax, ax
352	jmp	L(e2)
353
354	/* Fill out the table so that bad values are predictable.  */
355E(L(load_table2), X86_RET_UNUSED14)
356	int 3
357E(L(load_table2), X86_RET_UNUSED15)
358	int 3
359
360L(UW20):
361	// cfi_endproc
362ENDF(ffi_closure_i386)
363
364ALIGN 16
365PUBLIC	ffi_go_closure_STDCALL
366ffi_go_closure_STDCALL PROC C
367L(UW21):
368	// cfi_startproc
369	sub 	esp, closure_FS
370L(UW22):
371	// cfi_def_cfa_offset(closure_FS + 4)
372	FFI_CLOSURE_SAVE_REGS
373	mov 	edx, [ecx+4]			/* copy cif */
374	mov 	eax, [ecx+8]			/* copy fun */
375	mov 	[esp+closure_CF+28], edx
376	mov 	[esp+closure_CF+32], eax
377	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */
378	jmp	L(do_closure_STDCALL)
379L(UW23):
380	// cfi_endproc
381ENDF(ffi_go_closure_STDCALL)
382
383/* For REGISTER, we have no available parameter registers, and so we
384   enter here having pushed the closure onto the stack.  */
385
386ALIGN 16
387PUBLIC ffi_closure_REGISTER
388ffi_closure_REGISTER PROC C
389L(UW24):
390	// cfi_startproc
391	// cfi_def_cfa(%esp, 8)
392	// cfi_offset(%eip, -8)
393	sub 	esp, closure_FS-4
394L(UW25):
395	// cfi_def_cfa_offset(closure_FS + 4)
396	FFI_CLOSURE_SAVE_REGS
397	mov	ecx, [esp+closure_FS-4] 	/* load retaddr */
398	mov	eax, [esp+closure_FS]		/* load closure */
399	mov	[esp+closure_FS], ecx		/* move retaddr */
400	jmp	L(do_closure_REGISTER)
401L(UW26):
402	// cfi_endproc
403ENDF(ffi_closure_REGISTER)
404
405/* For STDCALL (and others), we need to pop N bytes of arguments off
406   the stack following the closure.  The amount needing to be popped
407   is returned to us from ffi_closure_inner.  */
408
409ALIGN 16
410PUBLIC ffi_closure_STDCALL
411ffi_closure_STDCALL PROC C
412L(UW27):
413	// cfi_startproc
414	sub 	esp, closure_FS
415L(UW28):
416	// cfi_def_cfa_offset(closure_FS + 4)
417
418	FFI_CLOSURE_SAVE_REGS
419
420	/* Entry point from ffi_closure_REGISTER.  */
421L(do_closure_REGISTER)::
422
423	FFI_CLOSURE_COPY_TRAMP_DATA
424
425	/* Entry point from preceeding Go closure.  */
426L(do_closure_STDCALL)::
427
428	FFI_CLOSURE_PREP_CALL
429	FFI_CLOSURE_CALL_INNER(29)
430
431	mov 	ecx, eax
432	shr 	ecx, X86_RET_POP_SHIFT	    /* isolate pop count */
433	lea 	ecx, [esp+closure_FS+ecx]	/* compute popped esp */
434	mov 	edx, [esp+closure_FS]		/* move return address */
435	mov 	[ecx], edx
436
437	/* From this point on, the value of %esp upon return is %ecx+4,
438	   and we've copied the return address to %ecx to make return easy.
439	   There's no point in representing this in the unwind info, as
440	   there is always a window between the mov and the ret which
441	   will be wrong from one point of view or another.  */
442
443	FFI_CLOSURE_MASK_AND_JUMP  L(C1(load_table,3))
444
445    ALIGN 8
446L(load_table3):
447E(L(load_table3), X86_RET_FLOAT)
448	fld    DWORD PTR [esp+closure_CF]
449	mov     esp, ecx
450	ret
451E(L(load_table3), X86_RET_DOUBLE)
452	fld    QWORD PTR [esp+closure_CF]
453	mov     esp, ecx
454	ret
455E(L(load_table3), X86_RET_LDOUBLE)
456	fld    QWORD PTR [esp+closure_CF]
457	mov     esp, ecx
458	ret
459E(L(load_table3), X86_RET_SINT8)
460	movsx   eax, al
461	mov     esp, ecx
462	ret
463E(L(load_table3), X86_RET_SINT16)
464	movsx   eax, ax
465	mov     esp, ecx
466	ret
467E(L(load_table3), X86_RET_UINT8)
468	movzx   eax, al
469	mov     esp, ecx
470	ret
471E(L(load_table3), X86_RET_UINT16)
472	movzx   eax, ax
473	mov     esp, ecx
474	ret
475E(L(load_table3), X86_RET_INT64)
476	mov 	edx, [esp+closure_CF+4]
477	mov     esp, ecx
478	ret
479E(L(load_table3), X86_RET_int 32)
480	mov     esp, ecx
481	ret
482E(L(load_table3), X86_RET_VOID)
483	mov     esp, ecx
484	ret
485E(L(load_table3), X86_RET_STRUCTPOP)
486	mov     esp, ecx
487	ret
488E(L(load_table3), X86_RET_STRUCTARG)
489	mov 	esp, ecx
490	ret
491E(L(load_table3), X86_RET_STRUCT_1B)
492	movzx	eax, al
493	mov 	esp, ecx
494	ret
495E(L(load_table3), X86_RET_STRUCT_2B)
496	movzx	eax, ax
497	mov 	esp, ecx
498	ret
499
500	/* Fill out the table so that bad values are predictable.  */
501E(L(load_table3), X86_RET_UNUSED14)
502	int 3
503E(L(load_table3), X86_RET_UNUSED15)
504	int 3
505
506L(UW31):
507	// cfi_endproc
508ENDF(ffi_closure_STDCALL)
509
510#if !FFI_NO_RAW_API
511
512#define raw_closure_S_FS	(16+16+12)
513
514ALIGN 16
515PUBLIC ffi_closure_raw_SYSV
516ffi_closure_raw_SYSV PROC C
517L(UW32):
518	// cfi_startproc
519	sub 	esp, raw_closure_S_FS
520L(UW33):
521	// cfi_def_cfa_offset(raw_closure_S_FS + 4)
522	mov 	[esp+raw_closure_S_FS-4], ebx
523L(UW34):
524	// cfi_rel_offset(%ebx, raw_closure_S_FS-4)
525
526	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */
527	mov 	[esp+12], edx
528	lea 	edx, [esp+raw_closure_S_FS+4]		/* load raw_args */
529	mov 	[esp+8], edx
530	lea 	edx, [esp+16]				/* load &res */
531	mov 	[esp+4], edx
532	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */
533	mov 	[esp], ebx
534	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */
535
536	mov 	eax, [ebx+20]			/* load cif->flags */
537	and 	eax, X86_RET_TYPE_MASK
538// #ifdef __PIC__
539// 	call	__x86.get_pc_thunk.bx
540// L(pc4):
541// 	lea 	ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
542// #else
543	lea 	ecx, [L(load_table4)+eax+8]
544// #endif
545	mov 	ebx, [esp+raw_closure_S_FS-4]
546L(UW35):
547	// cfi_restore(%ebx)
548	mov 	eax, [esp+16]				/* Optimistic load */
549	jmp	    dword ptr [ecx]
550
551	ALIGN 8
552L(load_table4):
553E(L(load_table4), X86_RET_FLOAT)
554	fld 	DWORD PTR [esp +16]
555	jmp	L(e4)
556E(L(load_table4), X86_RET_DOUBLE)
557	fld 	QWORD PTR [esp +16]
558	jmp	L(e4)
559E(L(load_table4), X86_RET_LDOUBLE)
560	fld 	QWORD PTR [esp +16]
561	jmp	L(e4)
562E(L(load_table4), X86_RET_SINT8)
563	movsx	eax, al
564	jmp	L(e4)
565E(L(load_table4), X86_RET_SINT16)
566	movsx	eax, ax
567	jmp	L(e4)
568E(L(load_table4), X86_RET_UINT8)
569	movzx	eax, al
570	jmp	L(e4)
571E(L(load_table4), X86_RET_UINT16)
572	movzx	eax, ax
573	jmp	L(e4)
574E(L(load_table4), X86_RET_INT64)
575	mov 	edx, [esp+16+4]
576	jmp	L(e4)
577E(L(load_table4), X86_RET_int 32)
578	nop
579	/* fallthru */
580E(L(load_table4), X86_RET_VOID)
581L(e4):
582	add 	esp, raw_closure_S_FS
583L(UW36):
584	// cfi_adjust_cfa_offset(-raw_closure_S_FS)
585	ret
586L(UW37):
587	// cfi_adjust_cfa_offset(raw_closure_S_FS)
588E(L(load_table4), X86_RET_STRUCTPOP)
589	add 	esp, raw_closure_S_FS
590L(UW38):
591	// cfi_adjust_cfa_offset(-raw_closure_S_FS)
592	ret	4
593L(UW39):
594	// cfi_adjust_cfa_offset(raw_closure_S_FS)
595E(L(load_table4), X86_RET_STRUCTARG)
596	jmp	L(e4)
597E(L(load_table4), X86_RET_STRUCT_1B)
598	movzx	eax, al
599	jmp	L(e4)
600E(L(load_table4), X86_RET_STRUCT_2B)
601	movzx	eax, ax
602	jmp	L(e4)
603
604	/* Fill out the table so that bad values are predictable.  */
605E(L(load_table4), X86_RET_UNUSED14)
606	int 3
607E(L(load_table4), X86_RET_UNUSED15)
608	int 3
609
610L(UW40):
611	// cfi_endproc
612ENDF(ffi_closure_raw_SYSV)
613
614#define raw_closure_T_FS	(16+16+8)
615
616ALIGN 16
617PUBLIC ffi_closure_raw_THISCALL
618ffi_closure_raw_THISCALL PROC C
619L(UW41):
620	// cfi_startproc
621	/* Rearrange the stack such that %ecx is the first argument.
622	   This means moving the return address.  */
623	pop 	edx
624L(UW42):
625	// cfi_def_cfa_offset(0)
626	// cfi_register(%eip, %edx)
627	push	ecx
628L(UW43):
629	// cfi_adjust_cfa_offset(4)
630	push 	edx
631L(UW44):
632	// cfi_adjust_cfa_offset(4)
633	// cfi_rel_offset(%eip, 0)
634	sub 	esp, raw_closure_T_FS
635L(UW45):
636	// cfi_adjust_cfa_offset(raw_closure_T_FS)
637	mov 	[esp+raw_closure_T_FS-4], ebx
638L(UW46):
639	// cfi_rel_offset(%ebx, raw_closure_T_FS-4)
640
641	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */
642	mov 	[esp+12], edx
643	lea 	edx, [esp+raw_closure_T_FS+4]		/* load raw_args */
644	mov 	[esp+8], edx
645	lea 	edx, [esp+16]				/* load &res */
646	mov 	[esp+4], edx
647	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */
648	mov 	[esp], ebx
649	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */
650
651	mov 	eax, [ebx+20]				/* load cif->flags */
652	and 	eax, X86_RET_TYPE_MASK
653// #ifdef __PIC__
654// 	call	__x86.get_pc_thunk.bx
655// L(pc5):
656// 	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
657// #else
658	lea 	ecx, [L(load_table5)+eax*8]
659//#endif
660	mov 	ebx, [esp+raw_closure_T_FS-4]
661L(UW47):
662	// cfi_restore(%ebx)
663	mov 	eax, [esp+16]				/* Optimistic load */
664	jmp	    DWORD PTR [ecx]
665
666	AlIGN 4
667L(load_table5):
668E(L(load_table5), X86_RET_FLOAT)
669	fld	DWORD PTR [esp +16]
670	jmp	L(e5)
671E(L(load_table5), X86_RET_DOUBLE)
672	fld	QWORD PTR [esp +16]
673	jmp	L(e5)
674E(L(load_table5), X86_RET_LDOUBLE)
675	fld	QWORD PTR [esp+16]
676	jmp	L(e5)
677E(L(load_table5), X86_RET_SINT8)
678	movsx	eax, al
679	jmp	L(e5)
680E(L(load_table5), X86_RET_SINT16)
681	movsx	eax, ax
682	jmp	L(e5)
683E(L(load_table5), X86_RET_UINT8)
684	movzx	eax, al
685	jmp	L(e5)
686E(L(load_table5), X86_RET_UINT16)
687	movzx	eax, ax
688	jmp	L(e5)
689E(L(load_table5), X86_RET_INT64)
690	mov 	edx, [esp+16+4]
691	jmp	L(e5)
692E(L(load_table5), X86_RET_int 32)
693	nop
694	/* fallthru */
695E(L(load_table5), X86_RET_VOID)
696L(e5):
697	add 	esp, raw_closure_T_FS
698L(UW48):
699	// cfi_adjust_cfa_offset(-raw_closure_T_FS)
700	/* Remove the extra %ecx argument we pushed.  */
701	ret	4
702L(UW49):
703	// cfi_adjust_cfa_offset(raw_closure_T_FS)
704E(L(load_table5), X86_RET_STRUCTPOP)
705	add 	esp, raw_closure_T_FS
706L(UW50):
707	// cfi_adjust_cfa_offset(-raw_closure_T_FS)
708	ret	8
709L(UW51):
710	// cfi_adjust_cfa_offset(raw_closure_T_FS)
711E(L(load_table5), X86_RET_STRUCTARG)
712	jmp	L(e5)
713E(L(load_table5), X86_RET_STRUCT_1B)
714	movzx	eax, al
715	jmp	L(e5)
716E(L(load_table5), X86_RET_STRUCT_2B)
717	movzx	eax, ax
718	jmp	L(e5)
719
720	/* Fill out the table so that bad values are predictable.  */
721E(L(load_table5), X86_RET_UNUSED14)
722	int 3
723E(L(load_table5), X86_RET_UNUSED15)
724	int 3
725
726L(UW52):
727	// cfi_endproc
728ENDF(ffi_closure_raw_THISCALL)
729
730#endif /* !FFI_NO_RAW_API */
731
732#ifdef X86_DARWIN
733# define COMDAT(X)							\
734        .section __TEXT,__text,coalesced,pure_instructions;		\
735        .weak_definition X;						\
736        FFI_HIDDEN(X)
737#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
738# define COMDAT(X)							\
739	.section .text.X,"axG",@progbits,X,comdat;			\
740	PUBLIC	X;							\
741	FFI_HIDDEN(X)
742#else
743# define COMDAT(X)
744#endif
745
746// #if defined(__PIC__)
747// 	COMDAT(C(__x86.get_pc_thunk.bx))
748// C(__x86.get_pc_thunk.bx):
749// 	movl	(%esp), %ebx
750// 	ret
751// ENDF(C(__x86.get_pc_thunk.bx))
752// # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
753// 	COMDAT(C(__x86.get_pc_thunk.dx))
754// C(__x86.get_pc_thunk.dx):
755// 	movl	(%esp), %edx
756// 	ret
757// ENDF(C(__x86.get_pc_thunk.dx))
758// #endif /* DARWIN || HIDDEN */
759// #endif /* __PIC__ */
760
761#if 0
762/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
763
764#ifdef __APPLE__
765.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
766EHFrame0:
767#elif defined(X86_WIN32)
768.section .eh_frame,"r"
769#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
770.section .eh_frame,EH_FRAME_FLAGS,@unwind
771#else
772.section .eh_frame,EH_FRAME_FLAGS,@progbits
773#endif
774
775#ifdef HAVE_AS_X86_PCREL
776# define PCREL(X)	X - .
777#else
778# define PCREL(X)	X@rel
779#endif
780
781/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
782#define ADV(N, P)	.byte 2, L(N)-L(P)
783
784	.balign 4
785L(CIE):
786	.set	L(set0),L(ECIE)-L(SCIE)
787	.long	L(set0)			/* CIE Length */
788L(SCIE):
789	.long	0			/* CIE Identifier Tag */
790	.byte	1			/* CIE Version */
791	.ascii	"zR\0"			/* CIE Augmentation */
792	.byte	1			/* CIE Code Alignment Factor */
793	.byte	0x7c			/* CIE Data Alignment Factor */
794	.byte	0x8			/* CIE RA Column */
795	.byte	1			/* Augmentation size */
796	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
797	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
798	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
799	.balign 4
800L(ECIE):
801
802	.set	L(set1),L(EFDE1)-L(SFDE1)
803	.long	L(set1)			/* FDE Length */
804L(SFDE1):
805	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
806	.long	PCREL(L(UW0))		/* Initial location */
807	.long	L(UW5)-L(UW0)		/* Address range */
808	.byte	0			/* Augmentation size */
809	ADV(UW1, UW0)
810	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
811	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
812	ADV(UW2, UW1)
813	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
814	ADV(UW3, UW2)
815	.byte	0xa			/* DW_CFA_remember_state */
816	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
817	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
818	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
819	ADV(UW4, UW3)
820	.byte	0xb			/* DW_CFA_restore_state */
821	.balign	4
822L(EFDE1):
823
824	.set	L(set2),L(EFDE2)-L(SFDE2)
825	.long	L(set2)			/* FDE Length */
826L(SFDE2):
827	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
828	.long	PCREL(L(UW6))		/* Initial location */
829	.long	L(UW8)-L(UW6)		/* Address range */
830	.byte	0			/* Augmentation size */
831	ADV(UW7, UW6)
832	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
833	.balign	4
834L(EFDE2):
835
836	.set	L(set3),L(EFDE3)-L(SFDE3)
837	.long	L(set3)			/* FDE Length */
838L(SFDE3):
839	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
840	.long	PCREL(L(UW9))		/* Initial location */
841	.long	L(UW11)-L(UW9)		/* Address range */
842	.byte	0			/* Augmentation size */
843	ADV(UW10, UW9)
844	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
845	.balign	4
846L(EFDE3):
847
848	.set	L(set4),L(EFDE4)-L(SFDE4)
849	.long	L(set4)			/* FDE Length */
850L(SFDE4):
851	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
852	.long	PCREL(L(UW12))		/* Initial location */
853	.long	L(UW20)-L(UW12)		/* Address range */
854	.byte	0			/* Augmentation size */
855	ADV(UW13, UW12)
856	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
857#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
858	ADV(UW14, UW13)
859	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
860	ADV(UW15, UW14)
861	.byte	0xc0+3			/* DW_CFA_restore %ebx */
862	ADV(UW16, UW15)
863#else
864	ADV(UW16, UW13)
865#endif
866	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
867	ADV(UW17, UW16)
868	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
869	ADV(UW18, UW17)
870	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
871	ADV(UW19, UW18)
872	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
873	.balign	4
874L(EFDE4):
875
876	.set	L(set5),L(EFDE5)-L(SFDE5)
877	.long	L(set5)			/* FDE Length */
878L(SFDE5):
879	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
880	.long	PCREL(L(UW21))		/* Initial location */
881	.long	L(UW23)-L(UW21)		/* Address range */
882	.byte	0			/* Augmentation size */
883	ADV(UW22, UW21)
884	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
885	.balign	4
886L(EFDE5):
887
888	.set	L(set6),L(EFDE6)-L(SFDE6)
889	.long	L(set6)			/* FDE Length */
890L(SFDE6):
891	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
892	.long	PCREL(L(UW24))		/* Initial location */
893	.long	L(UW26)-L(UW24)		/* Address range */
894	.byte	0			/* Augmentation size */
895	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
896	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
897	ADV(UW25, UW24)
898	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
899	.balign	4
900L(EFDE6):
901
902	.set	L(set7),L(EFDE7)-L(SFDE7)
903	.long	L(set7)			/* FDE Length */
904L(SFDE7):
905	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
906	.long	PCREL(L(UW27))		/* Initial location */
907	.long	L(UW31)-L(UW27)		/* Address range */
908	.byte	0			/* Augmentation size */
909	ADV(UW28, UW27)
910	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
911#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
912	ADV(UW29, UW28)
913	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
914	ADV(UW30, UW29)
915	.byte	0xc0+3			/* DW_CFA_restore %ebx */
916#endif
917	.balign	4
918L(EFDE7):
919
920#if !FFI_NO_RAW_API
921	.set	L(set8),L(EFDE8)-L(SFDE8)
922	.long	L(set8)			/* FDE Length */
923L(SFDE8):
924	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
925	.long	PCREL(L(UW32))		/* Initial location */
926	.long	L(UW40)-L(UW32)		/* Address range */
927	.byte	0			/* Augmentation size */
928	ADV(UW33, UW32)
929	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
930	ADV(UW34, UW33)
931	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
932	ADV(UW35, UW34)
933	.byte	0xc0+3			/* DW_CFA_restore %ebx */
934	ADV(UW36, UW35)
935	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
936	ADV(UW37, UW36)
937	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
938	ADV(UW38, UW37)
939	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
940	ADV(UW39, UW38)
941	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
942	.balign	4
943L(EFDE8):
944
945	.set	L(set9),L(EFDE9)-L(SFDE9)
946	.long	L(set9)			/* FDE Length */
947L(SFDE9):
948	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
949	.long	PCREL(L(UW41))		/* Initial location */
950	.long	L(UW52)-L(UW41)		/* Address range */
951	.byte	0			/* Augmentation size */
952	ADV(UW42, UW41)
953	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
954	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
955	ADV(UW43, UW42)
956	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
957	ADV(UW44, UW43)
958	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
959	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
960	ADV(UW45, UW44)
961	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
962	ADV(UW46, UW45)
963	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
964	ADV(UW47, UW46)
965	.byte	0xc0+3			/* DW_CFA_restore %ebx */
966	ADV(UW48, UW47)
967	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
968	ADV(UW49, UW48)
969	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
970	ADV(UW50, UW49)
971	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
972	ADV(UW51, UW50)
973	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
974	.balign	4
975L(EFDE9):
976#endif /* !FFI_NO_RAW_API */
977
978#ifdef _WIN32
979	.def	 @feat.00;
980	.scl	3;
981	.type	0;
982	.endef
983	PUBLIC	@feat.00
984@feat.00 = 1
985#endif
986
987#endif /* ifndef _MSC_VER */
988#endif /* ifndef __x86_64__ */
989
990#if defined __ELF__ && defined __linux__
991	.section	.note.GNU-stack,"",@progbits
992#endif
993#endif
994
995END