1/* -----------------------------------------------------------------------
2   sysv.S - Copyright (c) 2013  The Written Word, Inc.
3	  - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
4
5   X86 Foreign Function Interface
6
7   Permission is hereby granted, free of charge, to any person obtaining
8   a copy of this software and associated documentation files (the
9   ``Software''), to deal in the Software without restriction, including
10   without limitation the rights to use, copy, modify, merge, publish,
11   distribute, sublicense, and/or sell copies of the Software, and to
12   permit persons to whom the Software is furnished to do so, subject to
13   the following conditions:
14
15   The above copyright notice and this permission notice shall be included
16   in all copies or substantial portions of the Software.
17
18   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
19   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25   DEALINGS IN THE SOFTWARE.
26   ----------------------------------------------------------------------- */
27
28#ifndef __x86_64__
29
30#define LIBFFI_ASM
31#include <fficonfig.h>
32#include <ffi.h>
33#include "internal.h"
34
35#define C2(X, Y)  X ## Y
36#define C1(X, Y)  C2(X, Y)
37#ifdef __USER_LABEL_PREFIX__
38# define C(X)     C1(__USER_LABEL_PREFIX__, X)
39#else
40# define C(X)     X
41#endif
42
43#ifdef X86_DARWIN
44# define L(X)     C1(L, X)
45#else
46# define L(X)     C1(.L, X)
47#endif
48
49#ifdef __ELF__
50# define ENDF(X)  .type	X,@function; .size X, . - X
51#else
52# define ENDF(X)
53#endif
54
55/* Handle win32 fastcall name mangling.  */
56#ifdef X86_WIN32
57# define ffi_call_i386		@ffi_call_i386@8
58# define ffi_closure_inner	@ffi_closure_inner@8
59#else
60# define ffi_call_i386		C(ffi_call_i386)
61# define ffi_closure_inner	C(ffi_closure_inner)
62#endif
63
64/* This macro allows the safe creation of jump tables without an
65   actual table.  The entry points into the table are all 8 bytes.
66   The use of ORG asserts that we're at the correct location.  */
67/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
68#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
69# define E(BASE, X)	.balign 8
70#else
71# define E(BASE, X)	.balign 8; .org BASE + X * 8
72#endif
73
74	.text
75	.balign	16
76	.globl	ffi_call_i386
77	FFI_HIDDEN(ffi_call_i386)
78
79/* This is declared as
80
81   void ffi_call_i386(struct call_frame *frame, char *argp)
82        __attribute__((fastcall));
83
84   Thus the arguments are present in
85
86        ecx: frame
87        edx: argp
88*/
89
90ffi_call_i386:
91L(UW0):
92	# cfi_startproc
93#if !HAVE_FASTCALL
94	movl	4(%esp), %ecx
95	movl	8(%esp), %edx
96#endif
97	movl	(%esp), %eax		/* move the return address */
98	movl	%ebp, (%ecx)		/* store %ebp into local frame */
99	movl	%eax, 4(%ecx)		/* store retaddr into local frame */
100
101	/* New stack frame based off ebp.  This is a itty bit of unwind
102	   trickery in that the CFA *has* changed.  There is no easy way
103	   to describe it correctly on entry to the function.  Fortunately,
104	   it doesn't matter too much since at all points we can correctly
105	   unwind back to ffi_call.  Note that the location to which we
106	   moved the return address is (the new) CFA-4, so from the
107	   perspective of the unwind info, it hasn't moved.  */
108	movl	%ecx, %ebp
109L(UW1):
110	# cfi_def_cfa(%ebp, 8)
111	# cfi_rel_offset(%ebp, 0)
112
113	movl	%edx, %esp		/* set outgoing argument stack */
114	movl	20+R_EAX*4(%ebp), %eax	/* set register arguments */
115	movl	20+R_EDX*4(%ebp), %edx
116	movl	20+R_ECX*4(%ebp), %ecx
117
118	call	*8(%ebp)
119
120	movl	12(%ebp), %ecx		/* load return type code */
121	movl	%ebx, 8(%ebp)		/* preserve %ebx */
122L(UW2):
123	# cfi_rel_offset(%ebx, 8)
124
125	andl	$X86_RET_TYPE_MASK, %ecx
126#ifdef __PIC__
127	call	C(__x86.get_pc_thunk.bx)
128L(pc1):
129	leal	L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
130#else
131	leal	L(store_table)(,%ecx, 8), %ebx
132#endif
133	movl	16(%ebp), %ecx		/* load result address */
134	jmp	*%ebx
135
136	.balign	8
137L(store_table):
138E(L(store_table), X86_RET_FLOAT)
139	fstps	(%ecx)
140	jmp	L(e1)
141E(L(store_table), X86_RET_DOUBLE)
142	fstpl	(%ecx)
143	jmp	L(e1)
144E(L(store_table), X86_RET_LDOUBLE)
145	fstpt	(%ecx)
146	jmp	L(e1)
147E(L(store_table), X86_RET_SINT8)
148	movsbl	%al, %eax
149	mov	%eax, (%ecx)
150	jmp	L(e1)
151E(L(store_table), X86_RET_SINT16)
152	movswl	%ax, %eax
153	mov	%eax, (%ecx)
154	jmp	L(e1)
155E(L(store_table), X86_RET_UINT8)
156	movzbl	%al, %eax
157	mov	%eax, (%ecx)
158	jmp	L(e1)
159E(L(store_table), X86_RET_UINT16)
160	movzwl	%ax, %eax
161	mov	%eax, (%ecx)
162	jmp	L(e1)
163E(L(store_table), X86_RET_INT64)
164	movl	%edx, 4(%ecx)
165	/* fallthru */
166E(L(store_table), X86_RET_INT32)
167	movl	%eax, (%ecx)
168	/* fallthru */
169E(L(store_table), X86_RET_VOID)
170L(e1):
171	movl	8(%ebp), %ebx
172	movl	%ebp, %esp
173	popl	%ebp
174L(UW3):
175	# cfi_remember_state
176	# cfi_def_cfa(%esp, 4)
177	# cfi_restore(%ebx)
178	# cfi_restore(%ebp)
179	ret
180L(UW4):
181	# cfi_restore_state
182
183E(L(store_table), X86_RET_STRUCTPOP)
184	jmp	L(e1)
185E(L(store_table), X86_RET_STRUCTARG)
186	jmp	L(e1)
187E(L(store_table), X86_RET_STRUCT_1B)
188	movb	%al, (%ecx)
189	jmp	L(e1)
190E(L(store_table), X86_RET_STRUCT_2B)
191	movw	%ax, (%ecx)
192	jmp	L(e1)
193
194	/* Fill out the table so that bad values are predictable.  */
195E(L(store_table), X86_RET_UNUSED14)
196	ud2
197E(L(store_table), X86_RET_UNUSED15)
198	ud2
199
200L(UW5):
201	# cfi_endproc
202ENDF(ffi_call_i386)
203
204/* The inner helper is declared as
205
206   void ffi_closure_inner(struct closure_frame *frame, char *argp)
207	__attribute_((fastcall))
208
209   Thus the arguments are placed in
210
211	ecx:	frame
212	edx:	argp
213*/
214
215/* Macros to help setting up the closure_data structure.  */
216
217#if HAVE_FASTCALL
218# define closure_FS	(40 + 4)
219# define closure_CF	0
220#else
221# define closure_FS	(8 + 40 + 12)
222# define closure_CF	8
223#endif
224
225#define FFI_CLOSURE_SAVE_REGS		\
226	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\
227	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\
228	movl	%ecx, closure_CF+16+R_ECX*4(%esp)
229
230#define FFI_CLOSURE_COPY_TRAMP_DATA					\
231	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\
232	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\
233	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %eax;	/* copy user_data */ \
234	movl	%edx, closure_CF+28(%esp);				\
235	movl	%ecx, closure_CF+32(%esp);				\
236	movl	%eax, closure_CF+36(%esp)
237
238#if HAVE_FASTCALL
239# define FFI_CLOSURE_PREP_CALL						\
240	movl	%esp, %ecx;			/* load closure_data */	\
241	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */
242#else
243# define FFI_CLOSURE_PREP_CALL						\
244	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\
245	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
246	movl	%ecx, (%esp);						\
247	movl	%edx, 4(%esp)
248#endif
249
250#define FFI_CLOSURE_CALL_INNER(UWN) \
251	call	ffi_closure_inner
252
253#define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
254	andl	$X86_RET_TYPE_MASK, %eax;				\
255	leal	L(C1(load_table,N))(, %eax, 8), %edx;			\
256	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
257	jmp	*%edx
258
259#ifdef __PIC__
260# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
261#  undef FFI_CLOSURE_MASK_AND_JUMP
262#  define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
263	andl	$X86_RET_TYPE_MASK, %eax;				\
264	call	C(__x86.get_pc_thunk.dx);				\
265L(C1(pc,N)):								\
266	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx;	\
267	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
268	jmp	*%edx
269# else
270#  define FFI_CLOSURE_CALL_INNER_SAVE_EBX
271#  undef FFI_CLOSURE_CALL_INNER
272#  define FFI_CLOSURE_CALL_INNER(UWN)					\
273	movl	%ebx, 40(%esp);			/* save ebx */		\
274L(C1(UW,UWN)):								\
275	# cfi_rel_offset(%ebx, 40);					\
276	call	C(__x86.get_pc_thunk.bx);	/* load got register */	\
277	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\
278	call	ffi_closure_inner@PLT
279#  undef FFI_CLOSURE_MASK_AND_JUMP
280#  define FFI_CLOSURE_MASK_AND_JUMP(N, UWN)				\
281	andl	$X86_RET_TYPE_MASK, %eax;				\
282	leal	L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx;	\
283	movl	40(%esp), %ebx;			/* restore ebx */	\
284L(C1(UW,UWN)):								\
285	# cfi_restore(%ebx);						\
286	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
287	jmp	*%edx
288# endif /* DARWIN || HIDDEN */
289#endif /* __PIC__ */
290
291	.balign	16
292	.globl	C(ffi_go_closure_EAX)
293	FFI_HIDDEN(C(ffi_go_closure_EAX))
294C(ffi_go_closure_EAX):
295L(UW6):
296	# cfi_startproc
297	subl	$closure_FS, %esp
298L(UW7):
299	# cfi_def_cfa_offset(closure_FS + 4)
300	FFI_CLOSURE_SAVE_REGS
301	movl	4(%eax), %edx			/* copy cif */
302	movl	8(%eax), %ecx			/* copy fun */
303	movl	%edx, closure_CF+28(%esp)
304	movl	%ecx, closure_CF+32(%esp)
305	movl	%eax, closure_CF+36(%esp)	/* closure is user_data */
306	jmp	L(do_closure_i386)
307L(UW8):
308	# cfi_endproc
309ENDF(C(ffi_go_closure_EAX))
310
311	.balign	16
312	.globl	C(ffi_go_closure_ECX)
313	FFI_HIDDEN(C(ffi_go_closure_ECX))
314C(ffi_go_closure_ECX):
315L(UW9):
316	# cfi_startproc
317	subl	$closure_FS, %esp
318L(UW10):
319	# cfi_def_cfa_offset(closure_FS + 4)
320	FFI_CLOSURE_SAVE_REGS
321	movl	4(%ecx), %edx			/* copy cif */
322	movl	8(%ecx), %eax			/* copy fun */
323	movl	%edx, closure_CF+28(%esp)
324	movl	%eax, closure_CF+32(%esp)
325	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
326	jmp	L(do_closure_i386)
327L(UW11):
328	# cfi_endproc
329ENDF(C(ffi_go_closure_ECX))
330
331/* The closure entry points are reached from the ffi_closure trampoline.
332   On entry, %eax contains the address of the ffi_closure.  */
333
334	.balign	16
335	.globl	C(ffi_closure_i386)
336	FFI_HIDDEN(C(ffi_closure_i386))
337
338C(ffi_closure_i386):
339L(UW12):
340	# cfi_startproc
341	subl	$closure_FS, %esp
342L(UW13):
343	# cfi_def_cfa_offset(closure_FS + 4)
344
345	FFI_CLOSURE_SAVE_REGS
346	FFI_CLOSURE_COPY_TRAMP_DATA
347
348	/* Entry point from preceeding Go closures.  */
349L(do_closure_i386):
350
351	FFI_CLOSURE_PREP_CALL
352	FFI_CLOSURE_CALL_INNER(14)
353	FFI_CLOSURE_MASK_AND_JUMP(2, 15)
354
355	.balign	8
356L(load_table2):
357E(L(load_table2), X86_RET_FLOAT)
358	flds	closure_CF(%esp)
359	jmp	L(e2)
360E(L(load_table2), X86_RET_DOUBLE)
361	fldl	closure_CF(%esp)
362	jmp	L(e2)
363E(L(load_table2), X86_RET_LDOUBLE)
364	fldt	closure_CF(%esp)
365	jmp	L(e2)
366E(L(load_table2), X86_RET_SINT8)
367	movsbl	%al, %eax
368	jmp	L(e2)
369E(L(load_table2), X86_RET_SINT16)
370	movswl	%ax, %eax
371	jmp	L(e2)
372E(L(load_table2), X86_RET_UINT8)
373	movzbl	%al, %eax
374	jmp	L(e2)
375E(L(load_table2), X86_RET_UINT16)
376	movzwl	%ax, %eax
377	jmp	L(e2)
378E(L(load_table2), X86_RET_INT64)
379	movl	closure_CF+4(%esp), %edx
380	jmp	L(e2)
381E(L(load_table2), X86_RET_INT32)
382	nop
383	/* fallthru */
384E(L(load_table2), X86_RET_VOID)
385L(e2):
386	addl	$closure_FS, %esp
387L(UW16):
388	# cfi_adjust_cfa_offset(-closure_FS)
389	ret
390L(UW17):
391	# cfi_adjust_cfa_offset(closure_FS)
392E(L(load_table2), X86_RET_STRUCTPOP)
393	addl	$closure_FS, %esp
394L(UW18):
395	# cfi_adjust_cfa_offset(-closure_FS)
396	ret	$4
397L(UW19):
398	# cfi_adjust_cfa_offset(closure_FS)
399E(L(load_table2), X86_RET_STRUCTARG)
400	jmp	L(e2)
401E(L(load_table2), X86_RET_STRUCT_1B)
402	movzbl	%al, %eax
403	jmp	L(e2)
404E(L(load_table2), X86_RET_STRUCT_2B)
405	movzwl	%ax, %eax
406	jmp	L(e2)
407
408	/* Fill out the table so that bad values are predictable.  */
409E(L(load_table2), X86_RET_UNUSED14)
410	ud2
411E(L(load_table2), X86_RET_UNUSED15)
412	ud2
413
414L(UW20):
415	# cfi_endproc
416ENDF(C(ffi_closure_i386))
417
418	.balign	16
419	.globl	C(ffi_go_closure_STDCALL)
420	FFI_HIDDEN(C(ffi_go_closure_STDCALL))
421C(ffi_go_closure_STDCALL):
422L(UW21):
423	# cfi_startproc
424	subl	$closure_FS, %esp
425L(UW22):
426	# cfi_def_cfa_offset(closure_FS + 4)
427	FFI_CLOSURE_SAVE_REGS
428	movl	4(%ecx), %edx			/* copy cif */
429	movl	8(%ecx), %eax			/* copy fun */
430	movl	%edx, closure_CF+28(%esp)
431	movl	%eax, closure_CF+32(%esp)
432	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
433	jmp	L(do_closure_STDCALL)
434L(UW23):
435	# cfi_endproc
436ENDF(C(ffi_go_closure_STDCALL))
437
438/* For REGISTER, we have no available parameter registers, and so we
439   enter here having pushed the closure onto the stack.  */
440
441	.balign	16
442	.globl	C(ffi_closure_REGISTER)
443	FFI_HIDDEN(C(ffi_closure_REGISTER))
444C(ffi_closure_REGISTER):
445L(UW24):
446	# cfi_startproc
447	# cfi_def_cfa(%esp, 8)
448	# cfi_offset(%eip, -8)
449	subl	$closure_FS-4, %esp
450L(UW25):
451	# cfi_def_cfa_offset(closure_FS + 4)
452	FFI_CLOSURE_SAVE_REGS
453	movl	closure_FS-4(%esp), %ecx	/* load retaddr */
454	movl	closure_FS(%esp), %eax		/* load closure */
455	movl	%ecx, closure_FS(%esp)		/* move retaddr */
456	jmp	L(do_closure_REGISTER)
457L(UW26):
458	# cfi_endproc
459ENDF(C(ffi_closure_REGISTER))
460
461/* For STDCALL (and others), we need to pop N bytes of arguments off
462   the stack following the closure.  The amount needing to be popped
463   is returned to us from ffi_closure_inner.  */
464
465	.balign	16
466	.globl	C(ffi_closure_STDCALL)
467	FFI_HIDDEN(C(ffi_closure_STDCALL))
468C(ffi_closure_STDCALL):
469L(UW27):
470	# cfi_startproc
471	subl	$closure_FS, %esp
472L(UW28):
473	# cfi_def_cfa_offset(closure_FS + 4)
474
475	FFI_CLOSURE_SAVE_REGS
476
477	/* Entry point from ffi_closure_REGISTER.  */
478L(do_closure_REGISTER):
479
480	FFI_CLOSURE_COPY_TRAMP_DATA
481
482	/* Entry point from preceeding Go closure.  */
483L(do_closure_STDCALL):
484
485	FFI_CLOSURE_PREP_CALL
486	FFI_CLOSURE_CALL_INNER(29)
487
488	movl	%eax, %ecx
489	shrl	$X86_RET_POP_SHIFT, %ecx	/* isolate pop count */
490	leal	closure_FS(%esp, %ecx), %ecx	/* compute popped esp */
491	movl	closure_FS(%esp), %edx		/* move return address */
492	movl	%edx, (%ecx)
493
494	/* From this point on, the value of %esp upon return is %ecx+4,
495	   and we've copied the return address to %ecx to make return easy.
496	   There's no point in representing this in the unwind info, as
497	   there is always a window between the mov and the ret which
498	   will be wrong from one point of view or another.  */
499
500	FFI_CLOSURE_MASK_AND_JUMP(3, 30)
501
502	.balign	8
503L(load_table3):
504E(L(load_table3), X86_RET_FLOAT)
505	flds    closure_CF(%esp)
506	movl    %ecx, %esp
507	ret
508E(L(load_table3), X86_RET_DOUBLE)
509	fldl    closure_CF(%esp)
510	movl    %ecx, %esp
511	ret
512E(L(load_table3), X86_RET_LDOUBLE)
513	fldt    closure_CF(%esp)
514	movl    %ecx, %esp
515	ret
516E(L(load_table3), X86_RET_SINT8)
517	movsbl  %al, %eax
518	movl    %ecx, %esp
519	ret
520E(L(load_table3), X86_RET_SINT16)
521	movswl  %ax, %eax
522	movl    %ecx, %esp
523	ret
524E(L(load_table3), X86_RET_UINT8)
525	movzbl  %al, %eax
526	movl    %ecx, %esp
527	ret
528E(L(load_table3), X86_RET_UINT16)
529	movzwl  %ax, %eax
530	movl    %ecx, %esp
531	ret
532E(L(load_table3), X86_RET_INT64)
533	movl	closure_CF+4(%esp), %edx
534	movl    %ecx, %esp
535	ret
536E(L(load_table3), X86_RET_INT32)
537	movl    %ecx, %esp
538	ret
539E(L(load_table3), X86_RET_VOID)
540	movl    %ecx, %esp
541	ret
542E(L(load_table3), X86_RET_STRUCTPOP)
543	movl    %ecx, %esp
544	ret
545E(L(load_table3), X86_RET_STRUCTARG)
546	movl	%ecx, %esp
547	ret
548E(L(load_table3), X86_RET_STRUCT_1B)
549	movzbl	%al, %eax
550	movl	%ecx, %esp
551	ret
552E(L(load_table3), X86_RET_STRUCT_2B)
553	movzwl	%ax, %eax
554	movl	%ecx, %esp
555	ret
556
557	/* Fill out the table so that bad values are predictable.  */
558E(L(load_table3), X86_RET_UNUSED14)
559	ud2
560E(L(load_table3), X86_RET_UNUSED15)
561	ud2
562
563L(UW31):
564	# cfi_endproc
565ENDF(C(ffi_closure_STDCALL))
566
567#if !FFI_NO_RAW_API
568
569#define raw_closure_S_FS	(16+16+12)
570
571	.balign	16
572	.globl	C(ffi_closure_raw_SYSV)
573	FFI_HIDDEN(C(ffi_closure_raw_SYSV))
574C(ffi_closure_raw_SYSV):
575L(UW32):
576	# cfi_startproc
577	subl	$raw_closure_S_FS, %esp
578L(UW33):
579	# cfi_def_cfa_offset(raw_closure_S_FS + 4)
580	movl	%ebx, raw_closure_S_FS-4(%esp)
581L(UW34):
582	# cfi_rel_offset(%ebx, raw_closure_S_FS-4)
583
584	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
585	movl	%edx, 12(%esp)
586	leal	raw_closure_S_FS+4(%esp), %edx		/* load raw_args */
587	movl	%edx, 8(%esp)
588	leal	16(%esp), %edx				/* load &res */
589	movl	%edx, 4(%esp)
590	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
591	movl	%ebx, (%esp)
592	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
593
594	movl	20(%ebx), %eax				/* load cif->flags */
595	andl	$X86_RET_TYPE_MASK, %eax
596#ifdef __PIC__
597	call	C(__x86.get_pc_thunk.bx)
598L(pc4):
599	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
600#else
601	leal	L(load_table4)(,%eax, 8), %ecx
602#endif
603	movl	raw_closure_S_FS-4(%esp), %ebx
604L(UW35):
605	# cfi_restore(%ebx)
606	movl	16(%esp), %eax				/* Optimistic load */
607	jmp	*%ecx
608
609	.balign	8
610L(load_table4):
611E(L(load_table4), X86_RET_FLOAT)
612	flds	16(%esp)
613	jmp	L(e4)
614E(L(load_table4), X86_RET_DOUBLE)
615	fldl	16(%esp)
616	jmp	L(e4)
617E(L(load_table4), X86_RET_LDOUBLE)
618	fldt	16(%esp)
619	jmp	L(e4)
620E(L(load_table4), X86_RET_SINT8)
621	movsbl	%al, %eax
622	jmp	L(e4)
623E(L(load_table4), X86_RET_SINT16)
624	movswl	%ax, %eax
625	jmp	L(e4)
626E(L(load_table4), X86_RET_UINT8)
627	movzbl	%al, %eax
628	jmp	L(e4)
629E(L(load_table4), X86_RET_UINT16)
630	movzwl	%ax, %eax
631	jmp	L(e4)
632E(L(load_table4), X86_RET_INT64)
633	movl	16+4(%esp), %edx
634	jmp	L(e4)
635E(L(load_table4), X86_RET_INT32)
636	nop
637	/* fallthru */
638E(L(load_table4), X86_RET_VOID)
639L(e4):
640	addl	$raw_closure_S_FS, %esp
641L(UW36):
642	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
643	ret
644L(UW37):
645	# cfi_adjust_cfa_offset(raw_closure_S_FS)
646E(L(load_table4), X86_RET_STRUCTPOP)
647	addl	$raw_closure_S_FS, %esp
648L(UW38):
649	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
650	ret	$4
651L(UW39):
652	# cfi_adjust_cfa_offset(raw_closure_S_FS)
653E(L(load_table4), X86_RET_STRUCTARG)
654	jmp	L(e4)
655E(L(load_table4), X86_RET_STRUCT_1B)
656	movzbl	%al, %eax
657	jmp	L(e4)
658E(L(load_table4), X86_RET_STRUCT_2B)
659	movzwl	%ax, %eax
660	jmp	L(e4)
661
662	/* Fill out the table so that bad values are predictable.  */
663E(L(load_table4), X86_RET_UNUSED14)
664	ud2
665E(L(load_table4), X86_RET_UNUSED15)
666	ud2
667
668L(UW40):
669	# cfi_endproc
670ENDF(C(ffi_closure_raw_SYSV))
671
672#define raw_closure_T_FS	(16+16+8)
673
674	.balign	16
675	.globl	C(ffi_closure_raw_THISCALL)
676	FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
677C(ffi_closure_raw_THISCALL):
678L(UW41):
679	# cfi_startproc
680	/* Rearrange the stack such that %ecx is the first argument.
681	   This means moving the return address.  */
682	popl	%edx
683L(UW42):
684	# cfi_def_cfa_offset(0)
685	# cfi_register(%eip, %edx)
686	pushl	%ecx
687L(UW43):
688	# cfi_adjust_cfa_offset(4)
689	pushl	%edx
690L(UW44):
691	# cfi_adjust_cfa_offset(4)
692	# cfi_rel_offset(%eip, 0)
693	subl	$raw_closure_T_FS, %esp
694L(UW45):
695	# cfi_adjust_cfa_offset(raw_closure_T_FS)
696	movl	%ebx, raw_closure_T_FS-4(%esp)
697L(UW46):
698	# cfi_rel_offset(%ebx, raw_closure_T_FS-4)
699
700	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
701	movl	%edx, 12(%esp)
702	leal	raw_closure_T_FS+4(%esp), %edx		/* load raw_args */
703	movl	%edx, 8(%esp)
704	leal	16(%esp), %edx				/* load &res */
705	movl	%edx, 4(%esp)
706	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
707	movl	%ebx, (%esp)
708	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
709
710	movl	20(%ebx), %eax				/* load cif->flags */
711	andl	$X86_RET_TYPE_MASK, %eax
712#ifdef __PIC__
713	call	C(__x86.get_pc_thunk.bx)
714L(pc5):
715	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
716#else
717	leal	L(load_table5)(,%eax, 8), %ecx
718#endif
719	movl	raw_closure_T_FS-4(%esp), %ebx
720L(UW47):
721	# cfi_restore(%ebx)
722	movl	16(%esp), %eax				/* Optimistic load */
723	jmp	*%ecx
724
725	.balign	8
726L(load_table5):
727E(L(load_table5), X86_RET_FLOAT)
728	flds	16(%esp)
729	jmp	L(e5)
730E(L(load_table5), X86_RET_DOUBLE)
731	fldl	16(%esp)
732	jmp	L(e5)
733E(L(load_table5), X86_RET_LDOUBLE)
734	fldt	16(%esp)
735	jmp	L(e5)
736E(L(load_table5), X86_RET_SINT8)
737	movsbl	%al, %eax
738	jmp	L(e5)
739E(L(load_table5), X86_RET_SINT16)
740	movswl	%ax, %eax
741	jmp	L(e5)
742E(L(load_table5), X86_RET_UINT8)
743	movzbl	%al, %eax
744	jmp	L(e5)
745E(L(load_table5), X86_RET_UINT16)
746	movzwl	%ax, %eax
747	jmp	L(e5)
748E(L(load_table5), X86_RET_INT64)
749	movl	16+4(%esp), %edx
750	jmp	L(e5)
751E(L(load_table5), X86_RET_INT32)
752	nop
753	/* fallthru */
754E(L(load_table5), X86_RET_VOID)
755L(e5):
756	addl	$raw_closure_T_FS, %esp
757L(UW48):
758	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
759	/* Remove the extra %ecx argument we pushed.  */
760	ret	$4
761L(UW49):
762	# cfi_adjust_cfa_offset(raw_closure_T_FS)
763E(L(load_table5), X86_RET_STRUCTPOP)
764	addl	$raw_closure_T_FS, %esp
765L(UW50):
766	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
767	ret	$8
768L(UW51):
769	# cfi_adjust_cfa_offset(raw_closure_T_FS)
770E(L(load_table5), X86_RET_STRUCTARG)
771	jmp	L(e5)
772E(L(load_table5), X86_RET_STRUCT_1B)
773	movzbl	%al, %eax
774	jmp	L(e5)
775E(L(load_table5), X86_RET_STRUCT_2B)
776	movzwl	%ax, %eax
777	jmp	L(e5)
778
779	/* Fill out the table so that bad values are predictable.  */
780E(L(load_table5), X86_RET_UNUSED14)
781	ud2
782E(L(load_table5), X86_RET_UNUSED15)
783	ud2
784
785L(UW52):
786	# cfi_endproc
787ENDF(C(ffi_closure_raw_THISCALL))
788
789#endif /* !FFI_NO_RAW_API */
790
791#ifdef X86_DARWIN
792# define COMDAT(X)							\
793        .section __TEXT,__textcoal_nt,coalesced,pure_instructions;	\
794        .weak_definition X;						\
795        .private_extern X
796#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
797# define COMDAT(X)							\
798	.section .text.X,"axG",@progbits,X,comdat;			\
799	.globl	X;							\
800	FFI_HIDDEN(X)
801#else
802# define COMDAT(X)
803#endif
804
805#if defined(__PIC__)
806	COMDAT(C(__x86.get_pc_thunk.bx))
807C(__x86.get_pc_thunk.bx):
808	movl	(%esp), %ebx
809	ret
810ENDF(C(__x86.get_pc_thunk.bx))
811# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
812	COMDAT(C(__x86.get_pc_thunk.dx))
813C(__x86.get_pc_thunk.dx):
814	movl	(%esp), %edx
815	ret
816ENDF(C(__x86.get_pc_thunk.dx))
817#endif /* DARWIN || HIDDEN */
818#endif /* __PIC__ */
819
820/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
821
822#ifdef __APPLE__
823.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
824EHFrame0:
825#elif defined(X86_WIN32)
826.section .eh_frame,"r"
827#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
828.section .eh_frame,EH_FRAME_FLAGS,@unwind
829#else
830.section .eh_frame,EH_FRAME_FLAGS,@progbits
831#endif
832
833#ifdef HAVE_AS_X86_PCREL
834# define PCREL(X)	X - .
835#else
836# define PCREL(X)	X@rel
837#endif
838
839/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
840#define ADV(N, P)	.byte 2, L(N)-L(P)
841
842	.balign 4
843L(CIE):
844	.set	L(set0),L(ECIE)-L(SCIE)
845	.long	L(set0)			/* CIE Length */
846L(SCIE):
847	.long	0			/* CIE Identifier Tag */
848	.byte	1			/* CIE Version */
849	.ascii	"zR\0"			/* CIE Augmentation */
850	.byte	1			/* CIE Code Alignment Factor */
851	.byte	0x7c			/* CIE Data Alignment Factor */
852	.byte	0x8			/* CIE RA Column */
853	.byte	1			/* Augmentation size */
854	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
855	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
856	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
857	.balign 4
858L(ECIE):
859
860	.set	L(set1),L(EFDE1)-L(SFDE1)
861	.long	L(set1)			/* FDE Length */
862L(SFDE1):
863	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
864	.long	PCREL(L(UW0))		/* Initial location */
865	.long	L(UW5)-L(UW0)		/* Address range */
866	.byte	0			/* Augmentation size */
867	ADV(UW1, UW0)
868	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
869	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
870	ADV(UW2, UW1)
871	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
872	ADV(UW3, UW2)
873	.byte	0xa			/* DW_CFA_remember_state */
874	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
875	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
876	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
877	ADV(UW4, UW3)
878	.byte	0xb			/* DW_CFA_restore_state */
879	.balign	4
880L(EFDE1):
881
882	.set	L(set2),L(EFDE2)-L(SFDE2)
883	.long	L(set2)			/* FDE Length */
884L(SFDE2):
885	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
886	.long	PCREL(L(UW6))		/* Initial location */
887	.long	L(UW8)-L(UW6)		/* Address range */
888	.byte	0			/* Augmentation size */
889	ADV(UW7, UW6)
890	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
891	.balign	4
892L(EFDE2):
893
894	.set	L(set3),L(EFDE3)-L(SFDE3)
895	.long	L(set3)			/* FDE Length */
896L(SFDE3):
897	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
898	.long	PCREL(L(UW9))		/* Initial location */
899	.long	L(UW11)-L(UW9)		/* Address range */
900	.byte	0			/* Augmentation size */
901	ADV(UW10, UW9)
902	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
903	.balign	4
904L(EFDE3):
905
906	.set	L(set4),L(EFDE4)-L(SFDE4)
907	.long	L(set4)			/* FDE Length */
908L(SFDE4):
909	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
910	.long	PCREL(L(UW12))		/* Initial location */
911	.long	L(UW20)-L(UW12)		/* Address range */
912	.byte	0			/* Augmentation size */
913	ADV(UW13, UW12)
914	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
915#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
916	ADV(UW14, UW13)
917	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
918	ADV(UW15, UW14)
919	.byte	0xc0+3			/* DW_CFA_restore %ebx */
920	ADV(UW16, UW15)
921#else
922	ADV(UW16, UW13)
923#endif
924	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
925	ADV(UW17, UW16)
926	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
927	ADV(UW18, UW17)
928	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
929	ADV(UW19, UW18)
930	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
931	.balign	4
932L(EFDE4):
933
934	.set	L(set5),L(EFDE5)-L(SFDE5)
935	.long	L(set5)			/* FDE Length */
936L(SFDE5):
937	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
938	.long	PCREL(L(UW21))		/* Initial location */
939	.long	L(UW23)-L(UW21)		/* Address range */
940	.byte	0			/* Augmentation size */
941	ADV(UW22, UW21)
942	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
943	.balign	4
944L(EFDE5):
945
946	.set	L(set6),L(EFDE6)-L(SFDE6)
947	.long	L(set6)			/* FDE Length */
948L(SFDE6):
949	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
950	.long	PCREL(L(UW24))		/* Initial location */
951	.long	L(UW26)-L(UW24)		/* Address range */
952	.byte	0			/* Augmentation size */
953	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
954	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
955	ADV(UW25, UW24)
956	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
957	.balign	4
958L(EFDE6):
959
960	.set	L(set7),L(EFDE7)-L(SFDE7)
961	.long	L(set7)			/* FDE Length */
962L(SFDE7):
963	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
964	.long	PCREL(L(UW27))		/* Initial location */
965	.long	L(UW31)-L(UW27)		/* Address range */
966	.byte	0			/* Augmentation size */
967	ADV(UW28, UW27)
968	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
969#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
970	ADV(UW29, UW28)
971	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
972	ADV(UW30, UW29)
973	.byte	0xc0+3			/* DW_CFA_restore %ebx */
974#endif
975	.balign	4
976L(EFDE7):
977
978#if !FFI_NO_RAW_API
979	.set	L(set8),L(EFDE8)-L(SFDE8)
980	.long	L(set8)			/* FDE Length */
981L(SFDE8):
982	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
983	.long	PCREL(L(UW32))		/* Initial location */
984	.long	L(UW40)-L(UW32)		/* Address range */
985	.byte	0			/* Augmentation size */
986	ADV(UW33, UW32)
987	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
988	ADV(UW34, UW33)
989	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
990	ADV(UW35, UW34)
991	.byte	0xc0+3			/* DW_CFA_restore %ebx */
992	ADV(UW36, UW35)
993	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
994	ADV(UW37, UW36)
995	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
996	ADV(UW38, UW37)
997	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
998	ADV(UW39, UW38)
999	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
1000	.balign	4
1001L(EFDE8):
1002
1003	.set	L(set9),L(EFDE9)-L(SFDE9)
1004	.long	L(set9)			/* FDE Length */
1005L(SFDE9):
1006	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
1007	.long	PCREL(L(UW41))		/* Initial location */
1008	.long	L(UW52)-L(UW41)		/* Address range */
1009	.byte	0			/* Augmentation size */
1010	ADV(UW42, UW41)
1011	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
1012	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
1013	ADV(UW43, UW42)
1014	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
1015	ADV(UW44, UW43)
1016	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
1017	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
1018	ADV(UW45, UW44)
1019	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
1020	ADV(UW46, UW45)
1021	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
1022	ADV(UW47, UW46)
1023	.byte	0xc0+3			/* DW_CFA_restore %ebx */
1024	ADV(UW48, UW47)
1025	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
1026	ADV(UW49, UW48)
1027	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
1028	ADV(UW50, UW49)
1029	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
1030	ADV(UW51, UW50)
1031	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
1032	.balign	4
1033L(EFDE9):
1034#endif /* !FFI_NO_RAW_API */
1035
1036#endif /* ifndef __x86_64__ */
1037
1038#if defined __ELF__ && defined __linux__
1039	.section	.note.GNU-stack,"",@progbits
1040#endif
1041