1/* ----------------------------------------------------------------------- 2 darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc. 3 derived from unix64.S 4 5 x86-64 Foreign Function Interface for Darwin. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 ``Software''), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice shall be included 16 in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 OTHER DEALINGS IN THE SOFTWARE. 25 ----------------------------------------------------------------------- */ 26 27#ifdef __x86_64__ 28#define LIBFFI_ASM 29#include <fficonfig.h> 30#include <ffi.h> 31 32 .file "darwin64.S" 33.text 34 35/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, 36 void *raddr, void (*fnaddr)()); 37 38 Bit o trickiness here -- ARGS+BYTES is the base of the stack frame 39 for this function. This has been allocated by ffi_call. We also 40 deallocate some of the stack that has been alloca'd. */ 41 42 .align 3 43 .globl _ffi_call_unix64 44 45_ffi_call_unix64: 46LUW0: 47 movq (%rsp), %r10 /* Load return address. */ 48 movq %rdi, %r12 /* Save a copy of the register area. */ 49 leaq (%rdi, %rsi), %rax /* Find local stack base. */ 50 movq %rdx, (%rax) /* Save flags. */ 51 movq %rcx, 8(%rax) /* Save raddr. */ 52 movq %rbp, 16(%rax) /* Save old frame pointer. */ 53 movq %r10, 24(%rax) /* Relocate return address. */ 54 movq %rax, %rbp /* Finalize local stack frame. */ 55LUW1: 56 /* movq %rdi, %r10 // Save a copy of the register area. */ 57 movq %r12, %r10 58 movq %r8, %r11 /* Save a copy of the target fn. */ 59 movl %r9d, %eax /* Set number of SSE registers. */ 60 61 /* Load up all argument registers. */ 62 movq (%r10), %rdi 63 movq 8(%r10), %rsi 64 movq 16(%r10), %rdx 65 movq 24(%r10), %rcx 66 movq 32(%r10), %r8 67 movq 40(%r10), %r9 68 testl %eax, %eax 69 jnz Lload_sse 70Lret_from_load_sse: 71 72 /* Deallocate the reg arg area. */ 73 leaq 176(%r10), %rsp 74 75 /* Call the user function. */ 76 call *%r11 77 78 /* Deallocate stack arg area; local stack frame in redzone. */ 79 leaq 24(%rbp), %rsp 80 81 movq 0(%rbp), %rcx /* Reload flags. */ 82 movq 8(%rbp), %rdi /* Reload raddr. */ 83 movq 16(%rbp), %rbp /* Reload old frame pointer. */ 84LUW2: 85 86 /* The first byte of the flags contains the FFI_TYPE. */ 87 movzbl %cl, %r10d 88 leaq Lstore_table(%rip), %r11 89 movslq (%r11, %r10, 4), %r10 90 addq %r11, %r10 91 jmp *%r10 92 93Lstore_table: 94 .long Lst_void-Lstore_table /* FFI_TYPE_VOID */ 95 .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */ 96 .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */ 97 .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */ 98 .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */ 99 .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */ 100 .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */ 101 .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */ 102 .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */ 103 .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */ 104 .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */ 105 .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */ 106 .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */ 107 .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */ 108 .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */ 109 110 .text 111 .align 3 112Lst_void: 113 ret 114 .align 3 115Lst_uint8: 116 movzbq %al, %rax 117 movq %rax, (%rdi) 118 ret 119 .align 3 120Lst_sint8: 121 movsbq %al, %rax 122 movq %rax, (%rdi) 123 ret 124 .align 3 125Lst_uint16: 126 movzwq %ax, %rax 127 movq %rax, (%rdi) 128 .align 3 129Lst_sint16: 130 movswq %ax, %rax 131 movq %rax, (%rdi) 132 ret 133 .align 3 134Lst_uint32: 135 movl %eax, %eax 136 movq %rax, (%rdi) 137 .align 3 138Lst_sint32: 139 cltq 140 movq %rax, (%rdi) 141 ret 142 .align 3 143Lst_int64: 144 movq %rax, (%rdi) 145 ret 146 .align 3 147Lst_float: 148 movss %xmm0, (%rdi) 149 ret 150 .align 3 151Lst_double: 152 movsd %xmm0, (%rdi) 153 ret 154Lst_ldouble: 155 fstpt (%rdi) 156 ret 157 .align 3 158Lst_struct: 159 leaq -20(%rsp), %rsi /* Scratch area in redzone. */ 160 161 /* We have to locate the values now, and since we don't want to 162 write too much data into the user's return value, we spill the 163 value to a 16 byte scratch area first. Bits 8, 9, and 10 164 control where the values are located. Only one of the three 165 bits will be set; see ffi_prep_cif_machdep for the pattern. */ 166 movd %xmm0, %r10 167 movd %xmm1, %r11 168 testl $0x100, %ecx 169 cmovnz %rax, %rdx 170 cmovnz %r10, %rax 171 testl $0x200, %ecx 172 cmovnz %r10, %rdx 173 testl $0x400, %ecx 174 cmovnz %r10, %rax 175 cmovnz %r11, %rdx 176 movq %rax, (%rsi) 177 movq %rdx, 8(%rsi) 178 179 /* Bits 12-31 contain the true size of the structure. Copy from 180 the scratch area to the true destination. */ 181 shrl $12, %ecx 182 rep movsb 183 ret 184 185 /* Many times we can avoid loading any SSE registers at all. 186 It's not worth an indirect jump to load the exact set of 187 SSE registers needed; zero or all is a good compromise. */ 188 .align 3 189LUW3: 190Lload_sse: 191 movdqa 48(%r10), %xmm0 192 movdqa 64(%r10), %xmm1 193 movdqa 80(%r10), %xmm2 194 movdqa 96(%r10), %xmm3 195 movdqa 112(%r10), %xmm4 196 movdqa 128(%r10), %xmm5 197 movdqa 144(%r10), %xmm6 198 movdqa 160(%r10), %xmm7 199 jmp Lret_from_load_sse 200 201LUW4: 202 .align 3 203 .globl _ffi_closure_unix64 204 205_ffi_closure_unix64: 206LUW5: 207 /* The carry flag is set by the trampoline iff SSE registers 208 are used. Don't clobber it before the branch instruction. */ 209 leaq -200(%rsp), %rsp 210LUW6: 211 movq %rdi, (%rsp) 212 movq %rsi, 8(%rsp) 213 movq %rdx, 16(%rsp) 214 movq %rcx, 24(%rsp) 215 movq %r8, 32(%rsp) 216 movq %r9, 40(%rsp) 217 jc Lsave_sse 218Lret_from_save_sse: 219 220 movq %r10, %rdi 221 leaq 176(%rsp), %rsi 222 movq %rsp, %rdx 223 leaq 208(%rsp), %rcx 224 call _ffi_closure_unix64_inner 225 226 /* Deallocate stack frame early; return value is now in redzone. */ 227 addq $200, %rsp 228LUW7: 229 230 /* The first byte of the return value contains the FFI_TYPE. */ 231 movzbl %al, %r10d 232 leaq Lload_table(%rip), %r11 233 movslq (%r11, %r10, 4), %r10 234 addq %r11, %r10 235 jmp *%r10 236 237Lload_table: 238 .long Lld_void-Lload_table /* FFI_TYPE_VOID */ 239 .long Lld_int32-Lload_table /* FFI_TYPE_INT */ 240 .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */ 241 .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */ 242 .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */ 243 .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */ 244 .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */ 245 .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */ 246 .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */ 247 .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */ 248 .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */ 249 .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */ 250 .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */ 251 .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */ 252 .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */ 253 254 .text 255 .align 3 256Lld_void: 257 ret 258 .align 3 259Lld_int8: 260 movzbl -24(%rsp), %eax 261 ret 262 .align 3 263Lld_int16: 264 movzwl -24(%rsp), %eax 265 ret 266 .align 3 267Lld_int32: 268 movl -24(%rsp), %eax 269 ret 270 .align 3 271Lld_int64: 272 movq -24(%rsp), %rax 273 ret 274 .align 3 275Lld_float: 276 movss -24(%rsp), %xmm0 277 ret 278 .align 3 279Lld_double: 280 movsd -24(%rsp), %xmm0 281 ret 282 .align 3 283Lld_ldouble: 284 fldt -24(%rsp) 285 ret 286 .align 3 287Lld_struct: 288 /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, 289 %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading 290 both rdx and xmm1 with the second word. For the remaining, 291 bit 8 set means xmm0 gets the second word, and bit 9 means 292 that rax gets the second word. */ 293 movq -24(%rsp), %rcx 294 movq -16(%rsp), %rdx 295 movq -16(%rsp), %xmm1 296 testl $0x100, %eax 297 cmovnz %rdx, %rcx 298 movd %rcx, %xmm0 299 testl $0x200, %eax 300 movq -24(%rsp), %rax 301 cmovnz %rdx, %rax 302 ret 303 304 /* See the comment above Lload_sse; the same logic applies here. */ 305 .align 3 306LUW8: 307Lsave_sse: 308 movdqa %xmm0, 48(%rsp) 309 movdqa %xmm1, 64(%rsp) 310 movdqa %xmm2, 80(%rsp) 311 movdqa %xmm3, 96(%rsp) 312 movdqa %xmm4, 112(%rsp) 313 movdqa %xmm5, 128(%rsp) 314 movdqa %xmm6, 144(%rsp) 315 movdqa %xmm7, 160(%rsp) 316 jmp Lret_from_save_sse 317 318LUW9: 319.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 320EH_frame1: 321 .set L$set$0,LECIE1-LSCIE1 /* CIE Length */ 322 .long L$set$0 323LSCIE1: 324 .long 0x0 /* CIE Identifier Tag */ 325 .byte 0x1 /* CIE Version */ 326 .ascii "zR\0" /* CIE Augmentation */ 327 .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ 328 .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */ 329 .byte 0x10 /* CIE RA Column */ 330 .byte 0x1 /* uleb128 0x1; Augmentation size */ 331 .byte 0x10 /* FDE Encoding (pcrel sdata4) */ 332 .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ 333 .byte 0x7 /* uleb128 0x7 */ 334 .byte 0x8 /* uleb128 0x8 */ 335 .byte 0x90 /* DW_CFA_offset, column 0x10 */ 336 .byte 0x1 337 .align 3 338LECIE1: 339 .globl _ffi_call_unix64.eh 340_ffi_call_unix64.eh: 341LSFDE1: 342 .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */ 343 .long L$set$1 344LASFDE1: 345 .long LASFDE1-EH_frame1 /* FDE CIE offset */ 346 .quad LUW0-. /* FDE initial location */ 347 .set L$set$2,LUW4-LUW0 /* FDE address range */ 348 .quad L$set$2 349 .byte 0x0 /* Augmentation size */ 350 .byte 0x4 /* DW_CFA_advance_loc4 */ 351 .set L$set$3,LUW1-LUW0 352 .long L$set$3 353 354 /* New stack frame based off rbp. This is an itty bit of unwind 355 trickery in that the CFA *has* changed. There is no easy way 356 to describe it correctly on entry to the function. Fortunately, 357 it doesn't matter too much since at all points we can correctly 358 unwind back to ffi_call. Note that the location to which we 359 moved the return address is (the new) CFA-8, so from the 360 perspective of the unwind info, it hasn't moved. */ 361 .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ 362 .byte 0x6 363 .byte 0x20 364 .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ 365 .byte 0x2 366 .byte 0xa /* DW_CFA_remember_state */ 367 368 .byte 0x4 /* DW_CFA_advance_loc4 */ 369 .set L$set$4,LUW2-LUW1 370 .long L$set$4 371 .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ 372 .byte 0x7 373 .byte 0x8 374 .byte 0xc0+6 /* DW_CFA_restore, %rbp */ 375 376 .byte 0x4 /* DW_CFA_advance_loc4 */ 377 .set L$set$5,LUW3-LUW2 378 .long L$set$5 379 .byte 0xb /* DW_CFA_restore_state */ 380 381 .align 3 382LEFDE1: 383 .globl _ffi_closure_unix64.eh 384_ffi_closure_unix64.eh: 385LSFDE3: 386 .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */ 387 .long L$set$6 388LASFDE3: 389 .long LASFDE3-EH_frame1 /* FDE CIE offset */ 390 .quad LUW5-. /* FDE initial location */ 391 .set L$set$7,LUW9-LUW5 /* FDE address range */ 392 .quad L$set$7 393 .byte 0x0 /* Augmentation size */ 394 395 .byte 0x4 /* DW_CFA_advance_loc4 */ 396 .set L$set$8,LUW6-LUW5 397 .long L$set$8 398 .byte 0xe /* DW_CFA_def_cfa_offset */ 399 .byte 208,1 /* uleb128 208 */ 400 .byte 0xa /* DW_CFA_remember_state */ 401 402 .byte 0x4 /* DW_CFA_advance_loc4 */ 403 .set L$set$9,LUW7-LUW6 404 .long L$set$9 405 .byte 0xe /* DW_CFA_def_cfa_offset */ 406 .byte 0x8 407 408 .byte 0x4 /* DW_CFA_advance_loc4 */ 409 .set L$set$10,LUW8-LUW7 410 .long L$set$10 411 .byte 0xb /* DW_CFA_restore_state */ 412 413 .align 3 414LEFDE3: 415 .subsections_via_symbols 416 417#endif /* __x86_64__ */ 418