1#define LIBFFI_ASM 2#include <fficonfig.h> 3#include <ffi.h> 4#include <ffi_cfi.h> 5 6#if defined(HAVE_AS_CFI_PSEUDO_OP) 7 .cfi_sections .debug_frame 8#endif 9 10#define arg0 %rcx 11#define arg1 %rdx 12#define arg2 %r8 13#define arg3 %r9 14 15#ifdef SYMBOL_UNDERSCORE 16#define SYMBOL_NAME(name) _##name 17#else 18#define SYMBOL_NAME(name) name 19#endif 20 21.macro E which 22 .align 8 23 .org 0b + \which * 8 24.endm 25 26 .text 27 28/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) 29 30 Bit o trickiness here -- FRAME is the base of the stack frame 31 for this function. This has been allocated by ffi_call. We also 32 deallocate some of the stack that has been alloca'd. */ 33 34 .align 8 35 .globl ffi_call_win64 36 37 .seh_proc ffi_call_win64 38ffi_call_win64: 39 cfi_startproc 40 /* Set up the local stack frame and install it in rbp/rsp. */ 41 movq (%rsp), %rax 42 movq %rbp, (arg1) 43 movq %rax, 8(arg1) 44 movq arg1, %rbp 45 cfi_def_cfa(%rbp, 16) 46 cfi_rel_offset(%rbp, 0) 47 .seh_pushreg %rbp 48 .seh_setframe %rbp, 0 49 .seh_endprologue 50 movq arg0, %rsp 51 52 movq arg2, %r10 53 54 /* Load all slots into both general and xmm registers. */ 55 movq (%rsp), %rcx 56 movsd (%rsp), %xmm0 57 movq 8(%rsp), %rdx 58 movsd 8(%rsp), %xmm1 59 movq 16(%rsp), %r8 60 movsd 16(%rsp), %xmm2 61 movq 24(%rsp), %r9 62 movsd 24(%rsp), %xmm3 63 64 call *16(%rbp) 65 66 movl 24(%rbp), %ecx 67 movq 32(%rbp), %r8 68 leaq 0f(%rip), %r10 69 cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx 70 leaq (%r10, %rcx, 8), %r10 71 ja 99f 72 jmp *%r10 73 74/* Below, we're space constrained most of the time. Thus we eschew the 75 modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ 76.macro epilogue 77 leaveq 78 cfi_remember_state 79 cfi_def_cfa(%rsp, 8) 80 cfi_restore(%rbp) 81 ret 82 cfi_restore_state 83.endm 84 85 .align 8 860: 87E FFI_TYPE_VOID 88 epilogue 89E FFI_TYPE_INT 90 movslq %eax, %rax 91 movq %rax, (%r8) 92 epilogue 93E FFI_TYPE_FLOAT 94 movss %xmm0, (%r8) 95 epilogue 96E FFI_TYPE_DOUBLE 97 movsd %xmm0, (%r8) 98 epilogue 99E FFI_TYPE_LONGDOUBLE 100 call abort 101E FFI_TYPE_UINT8 102 movzbl %al, %eax 103 movq %rax, (%r8) 104 epilogue 105E FFI_TYPE_SINT8 106 movsbq %al, %rax 107 jmp 98f 108E FFI_TYPE_UINT16 109 movzwl %ax, %eax 110 movq %rax, (%r8) 111 epilogue 112E FFI_TYPE_SINT16 113 movswq %ax, %rax 114 jmp 98f 115E FFI_TYPE_UINT32 116 movl %eax, %eax 117 movq %rax, (%r8) 118 epilogue 119E FFI_TYPE_SINT32 120 movslq %eax, %rax 121 movq %rax, (%r8) 122 epilogue 123E FFI_TYPE_UINT64 12498: movq %rax, (%r8) 125 epilogue 126E FFI_TYPE_SINT64 127 movq %rax, (%r8) 128 epilogue 129E FFI_TYPE_STRUCT 130 epilogue 131E FFI_TYPE_POINTER 132 movq %rax, (%r8) 133 epilogue 134E FFI_TYPE_COMPLEX 135 call abort 136E FFI_TYPE_SMALL_STRUCT_1B 137 movb %al, (%r8) 138 epilogue 139E FFI_TYPE_SMALL_STRUCT_2B 140 movw %ax, (%r8) 141 epilogue 142E FFI_TYPE_SMALL_STRUCT_4B 143 movl %eax, (%r8) 144 epilogue 145 146 .align 8 14799: call abort 148 149.purgem epilogue 150 151 cfi_endproc 152 .seh_endproc 153 154 155/* 32 bytes of outgoing register stack space, 8 bytes of alignment, 156 16 bytes of result, 32 bytes of xmm registers. */ 157#define ffi_clo_FS (32+8+16+32) 158#define ffi_clo_OFF_R (32+8) 159#define ffi_clo_OFF_X (32+8+16) 160 161 .align 8 162 .globl ffi_go_closure_win64 163 164 .seh_proc ffi_go_closure_win64 165ffi_go_closure_win64: 166 cfi_startproc 167 /* Save all integer arguments into the incoming reg stack space. */ 168 movq arg0, 8(%rsp) 169 movq arg1, 16(%rsp) 170 movq arg2, 24(%rsp) 171 movq arg3, 32(%rsp) 172 173 movq 8(%r10), arg0 /* load cif */ 174 movq 16(%r10), arg1 /* load fun */ 175 movq %r10, arg2 /* closure is user_data */ 176 jmp 0f 177 cfi_endproc 178 .seh_endproc 179 180 .align 8 181 .globl ffi_closure_win64 182 183 .seh_proc ffi_closure_win64 184ffi_closure_win64: 185 cfi_startproc 186 /* Save all integer arguments into the incoming reg stack space. */ 187 movq arg0, 8(%rsp) 188 movq arg1, 16(%rsp) 189 movq arg2, 24(%rsp) 190 movq arg3, 32(%rsp) 191 192 movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */ 193 movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */ 194 movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */ 1950: 196 subq $ffi_clo_FS, %rsp 197 cfi_adjust_cfa_offset(ffi_clo_FS) 198 .seh_stackalloc ffi_clo_FS 199 .seh_endprologue 200 201 /* Save all sse arguments into the stack frame. */ 202 movsd %xmm0, ffi_clo_OFF_X(%rsp) 203 movsd %xmm1, ffi_clo_OFF_X+8(%rsp) 204 movsd %xmm2, ffi_clo_OFF_X+16(%rsp) 205 movsd %xmm3, ffi_clo_OFF_X+24(%rsp) 206 207 leaq ffi_clo_OFF_R(%rsp), arg3 208 call ffi_closure_win64_inner 209 210 /* Load the result into both possible result registers. */ 211 movq ffi_clo_OFF_R(%rsp), %rax 212 movsd ffi_clo_OFF_R(%rsp), %xmm0 213 214 addq $ffi_clo_FS, %rsp 215 cfi_adjust_cfa_offset(-ffi_clo_FS) 216 ret 217 218 cfi_endproc 219 .seh_endproc 220