1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdint.h>
25 #include <ffi.h>
26 #include <ffi_common.h>
27 #include "internal.h"
28
29 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
30 all further uses in this file will refer to the 128-bit type. */
31 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
32 # if FFI_TYPE_LONGDOUBLE != 4
33 # error FFI_TYPE_LONGDOUBLE out of date
34 # endif
35 #else
36 # undef FFI_TYPE_LONGDOUBLE
37 # define FFI_TYPE_LONGDOUBLE 4
38 #endif
39
40 union _d
41 {
42 UINT64 d;
43 UINT32 s[2];
44 };
45
46 struct _v
47 {
48 union _d d[2] __attribute__((aligned(16)));
49 };
50
51 struct call_context
52 {
53 struct _v v[N_V_ARG_REG];
54 UINT64 x[N_X_ARG_REG];
55 };
56
57 #if defined (__clang__) && defined (__APPLE__)
58 extern void sys_icache_invalidate (void *start, size_t len);
59 #endif
60
61 static inline void
ffi_clear_cache(void * start,void * end)62 ffi_clear_cache (void *start, void *end)
63 {
64 #if defined (__clang__) && defined (__APPLE__)
65 sys_icache_invalidate (start, (char *)end - (char *)start);
66 #elif defined (__GNUC__)
67 __builtin___clear_cache (start, end);
68 #else
69 #error "Missing builtin to flush instruction cache"
70 #endif
71 }
72
73 /* A subroutine of is_vfp_type. Given a structure type, return the type code
74 of the first non-structure element. Recurse for structure elements.
75 Return -1 if the structure is in fact empty, i.e. no nested elements. */
76
77 static int
is_hfa0(const ffi_type * ty)78 is_hfa0 (const ffi_type *ty)
79 {
80 ffi_type **elements = ty->elements;
81 int i, ret = -1;
82
83 if (elements != NULL)
84 for (i = 0; elements[i]; ++i)
85 {
86 ret = elements[i]->type;
87 if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
88 {
89 ret = is_hfa0 (elements[i]);
90 if (ret < 0)
91 continue;
92 }
93 break;
94 }
95
96 return ret;
97 }
98
99 /* A subroutine of is_vfp_type. Given a structure type, return true if all
100 of the non-structure elements are the same as CANDIDATE. */
101
102 static int
is_hfa1(const ffi_type * ty,int candidate)103 is_hfa1 (const ffi_type *ty, int candidate)
104 {
105 ffi_type **elements = ty->elements;
106 int i;
107
108 if (elements != NULL)
109 for (i = 0; elements[i]; ++i)
110 {
111 int t = elements[i]->type;
112 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
113 {
114 if (!is_hfa1 (elements[i], candidate))
115 return 0;
116 }
117 else if (t != candidate)
118 return 0;
119 }
120
121 return 1;
122 }
123
124 /* Determine if TY may be allocated to the FP registers. This is both an
125 fp scalar type as well as an homogenous floating point aggregate (HFA).
126 That is, a structure consisting of 1 to 4 members of all the same type,
127 where that type is an fp scalar.
128
129 Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
130 constant for the type. */
131
132 static int
is_vfp_type(const ffi_type * ty)133 is_vfp_type (const ffi_type *ty)
134 {
135 ffi_type **elements;
136 int candidate, i;
137 size_t size, ele_count;
138
139 /* Quickest tests first. */
140 candidate = ty->type;
141 switch (candidate)
142 {
143 default:
144 return 0;
145 case FFI_TYPE_FLOAT:
146 case FFI_TYPE_DOUBLE:
147 case FFI_TYPE_LONGDOUBLE:
148 ele_count = 1;
149 goto done;
150 case FFI_TYPE_COMPLEX:
151 candidate = ty->elements[0]->type;
152 switch (candidate)
153 {
154 case FFI_TYPE_FLOAT:
155 case FFI_TYPE_DOUBLE:
156 case FFI_TYPE_LONGDOUBLE:
157 ele_count = 2;
158 goto done;
159 }
160 return 0;
161 case FFI_TYPE_STRUCT:
162 break;
163 }
164
165 /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
166 size = ty->size;
167 if (size < 4 || size > 64)
168 return 0;
169
170 /* Find the type of the first non-structure member. */
171 elements = ty->elements;
172 candidate = elements[0]->type;
173 if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
174 {
175 for (i = 0; ; ++i)
176 {
177 candidate = is_hfa0 (elements[i]);
178 if (candidate >= 0)
179 break;
180 }
181 }
182
183 /* If the first member is not a floating point type, it's not an HFA.
184 Also quickly re-check the size of the structure. */
185 switch (candidate)
186 {
187 case FFI_TYPE_FLOAT:
188 ele_count = size / sizeof(float);
189 if (size != ele_count * sizeof(float))
190 return 0;
191 break;
192 case FFI_TYPE_DOUBLE:
193 ele_count = size / sizeof(double);
194 if (size != ele_count * sizeof(double))
195 return 0;
196 break;
197 case FFI_TYPE_LONGDOUBLE:
198 ele_count = size / sizeof(long double);
199 if (size != ele_count * sizeof(long double))
200 return 0;
201 break;
202 default:
203 return 0;
204 }
205 if (ele_count > 4)
206 return 0;
207
208 /* Finally, make sure that all scalar elements are the same type. */
209 for (i = 0; elements[i]; ++i)
210 {
211 int t = elements[i]->type;
212 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
213 {
214 if (!is_hfa1 (elements[i], candidate))
215 return 0;
216 }
217 else if (t != candidate)
218 return 0;
219 }
220
221 /* All tests succeeded. Encode the result. */
222 done:
223 return candidate * 4 + (4 - ele_count);
224 }
225
226 /* Representation of the procedure call argument marshalling
227 state.
228
229 The terse state variable names match the names used in the AARCH64
230 PCS. */
231
232 struct arg_state
233 {
234 unsigned ngrn; /* Next general-purpose register number. */
235 unsigned nsrn; /* Next vector register number. */
236 size_t nsaa; /* Next stack offset. */
237
238 #if defined (__APPLE__)
239 unsigned allocating_variadic;
240 #endif
241 };
242
243 /* Initialize a procedure call argument marshalling state. */
244 static void
arg_init(struct arg_state * state)245 arg_init (struct arg_state *state)
246 {
247 state->ngrn = 0;
248 state->nsrn = 0;
249 state->nsaa = 0;
250 #if defined (__APPLE__)
251 state->allocating_variadic = 0;
252 #endif
253 }
254
255 /* Allocate an aligned slot on the stack and return a pointer to it. */
256 static void *
allocate_to_stack(struct arg_state * state,void * stack,size_t alignment,size_t size)257 allocate_to_stack (struct arg_state *state, void *stack,
258 size_t alignment, size_t size)
259 {
260 size_t nsaa = state->nsaa;
261
262 /* Round up the NSAA to the larger of 8 or the natural
263 alignment of the argument's type. */
264 #if defined (__APPLE__)
265 if (state->allocating_variadic && alignment < 8)
266 alignment = 8;
267 #else
268 if (alignment < 8)
269 alignment = 8;
270 #endif
271
272 nsaa = ALIGN (nsaa, alignment);
273 state->nsaa = nsaa + size;
274
275 return (char *)stack + nsaa;
276 }
277
278 static ffi_arg
extend_integer_type(void * source,int type)279 extend_integer_type (void *source, int type)
280 {
281 switch (type)
282 {
283 case FFI_TYPE_UINT8:
284 return *(UINT8 *) source;
285 case FFI_TYPE_SINT8:
286 return *(SINT8 *) source;
287 case FFI_TYPE_UINT16:
288 return *(UINT16 *) source;
289 case FFI_TYPE_SINT16:
290 return *(SINT16 *) source;
291 case FFI_TYPE_UINT32:
292 return *(UINT32 *) source;
293 case FFI_TYPE_INT:
294 case FFI_TYPE_SINT32:
295 return *(SINT32 *) source;
296 case FFI_TYPE_UINT64:
297 case FFI_TYPE_SINT64:
298 return *(UINT64 *) source;
299 break;
300 case FFI_TYPE_POINTER:
301 return *(uintptr_t *) source;
302 default:
303 abort();
304 }
305 }
306
307 static void
extend_hfa_type(void * dest,void * src,int h)308 extend_hfa_type (void *dest, void *src, int h)
309 {
310 int f = h - AARCH64_RET_S4;
311 void *x0;
312
313 asm volatile (
314 "adr %0, 0f\n"
315 " add %0, %0, %1\n"
316 " br %0\n"
317 "0: ldp s16, s17, [%3]\n" /* S4 */
318 " ldp s18, s19, [%3, #8]\n"
319 " b 4f\n"
320 " ldp s16, s17, [%3]\n" /* S3 */
321 " ldr s18, [%3, #8]\n"
322 " b 3f\n"
323 " ldp s16, s17, [%3]\n" /* S2 */
324 " b 2f\n"
325 " nop\n"
326 " ldr s16, [%3]\n" /* S1 */
327 " b 1f\n"
328 " nop\n"
329 " ldp d16, d17, [%3]\n" /* D4 */
330 " ldp d18, d19, [%3, #16]\n"
331 " b 4f\n"
332 " ldp d16, d17, [%3]\n" /* D3 */
333 " ldr d18, [%3, #16]\n"
334 " b 3f\n"
335 " ldp d16, d17, [%3]\n" /* D2 */
336 " b 2f\n"
337 " nop\n"
338 " ldr d16, [%3]\n" /* D1 */
339 " b 1f\n"
340 " nop\n"
341 " ldp q16, q17, [%3]\n" /* Q4 */
342 " ldp q18, q19, [%3, #16]\n"
343 " b 4f\n"
344 " ldp q16, q17, [%3]\n" /* Q3 */
345 " ldr q18, [%3, #16]\n"
346 " b 3f\n"
347 " ldp q16, q17, [%3]\n" /* Q2 */
348 " b 2f\n"
349 " nop\n"
350 " ldr q16, [%3]\n" /* Q1 */
351 " b 1f\n"
352 "4: str q19, [%2, #48]\n"
353 "3: str q18, [%2, #32]\n"
354 "2: str q17, [%2, #16]\n"
355 "1: str q16, [%2]"
356 : "=&r"(x0)
357 : "r"(f * 12), "r"(dest), "r"(src)
358 : "memory", "v16", "v17", "v18", "v19");
359 }
360
361 static void *
compress_hfa_type(void * dest,void * reg,int h)362 compress_hfa_type (void *dest, void *reg, int h)
363 {
364 switch (h)
365 {
366 case AARCH64_RET_S1:
367 if (dest == reg)
368 {
369 #ifdef __AARCH64EB__
370 dest += 12;
371 #endif
372 }
373 else
374 *(float *)dest = *(float *)reg;
375 break;
376 case AARCH64_RET_S2:
377 asm ("ldp q16, q17, [%1]\n\t"
378 "st2 { v16.s, v17.s }[0], [%0]"
379 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
380 break;
381 case AARCH64_RET_S3:
382 asm ("ldp q16, q17, [%1]\n\t"
383 "ldr q18, [%1, #32]\n\t"
384 "st3 { v16.s, v17.s, v18.s }[0], [%0]"
385 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
386 break;
387 case AARCH64_RET_S4:
388 asm ("ldp q16, q17, [%1]\n\t"
389 "ldp q18, q19, [%1, #32]\n\t"
390 "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
391 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
392 break;
393
394 case AARCH64_RET_D1:
395 if (dest == reg)
396 {
397 #ifdef __AARCH64EB__
398 dest += 8;
399 #endif
400 }
401 else
402 *(double *)dest = *(double *)reg;
403 break;
404 case AARCH64_RET_D2:
405 asm ("ldp q16, q17, [%1]\n\t"
406 "st2 { v16.d, v17.d }[0], [%0]"
407 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
408 break;
409 case AARCH64_RET_D3:
410 asm ("ldp q16, q17, [%1]\n\t"
411 "ldr q18, [%1, #32]\n\t"
412 "st3 { v16.d, v17.d, v18.d }[0], [%0]"
413 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
414 break;
415 case AARCH64_RET_D4:
416 asm ("ldp q16, q17, [%1]\n\t"
417 "ldp q18, q19, [%1, #32]\n\t"
418 "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
419 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
420 break;
421
422 default:
423 if (dest != reg)
424 return memcpy (dest, reg, 16 * (4 - (h & 3)));
425 break;
426 }
427 return dest;
428 }
429
430 /* Either allocate an appropriate register for the argument type, or if
431 none are available, allocate a stack slot and return a pointer
432 to the allocated space. */
433
434 static void *
allocate_int_to_reg_or_stack(struct call_context * context,struct arg_state * state,void * stack,size_t size)435 allocate_int_to_reg_or_stack (struct call_context *context,
436 struct arg_state *state,
437 void *stack, size_t size)
438 {
439 if (state->ngrn < N_X_ARG_REG)
440 return &context->x[state->ngrn++];
441
442 state->ngrn = N_X_ARG_REG;
443 return allocate_to_stack (state, stack, size, size);
444 }
445
446 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)447 ffi_prep_cif_machdep (ffi_cif *cif)
448 {
449 ffi_type *rtype = cif->rtype;
450 size_t bytes = cif->bytes;
451 int flags, i, n;
452
453 switch (rtype->type)
454 {
455 case FFI_TYPE_VOID:
456 flags = AARCH64_RET_VOID;
457 break;
458 case FFI_TYPE_UINT8:
459 flags = AARCH64_RET_UINT8;
460 break;
461 case FFI_TYPE_UINT16:
462 flags = AARCH64_RET_UINT16;
463 break;
464 case FFI_TYPE_UINT32:
465 flags = AARCH64_RET_UINT32;
466 break;
467 case FFI_TYPE_SINT8:
468 flags = AARCH64_RET_SINT8;
469 break;
470 case FFI_TYPE_SINT16:
471 flags = AARCH64_RET_SINT16;
472 break;
473 case FFI_TYPE_INT:
474 case FFI_TYPE_SINT32:
475 flags = AARCH64_RET_SINT32;
476 break;
477 case FFI_TYPE_SINT64:
478 case FFI_TYPE_UINT64:
479 flags = AARCH64_RET_INT64;
480 break;
481 case FFI_TYPE_POINTER:
482 flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
483 break;
484
485 case FFI_TYPE_FLOAT:
486 case FFI_TYPE_DOUBLE:
487 case FFI_TYPE_LONGDOUBLE:
488 case FFI_TYPE_STRUCT:
489 case FFI_TYPE_COMPLEX:
490 flags = is_vfp_type (rtype);
491 if (flags == 0)
492 {
493 size_t s = rtype->size;
494 if (s > 16)
495 {
496 flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
497 bytes += 8;
498 }
499 else if (s == 16)
500 flags = AARCH64_RET_INT128;
501 else if (s == 8)
502 flags = AARCH64_RET_INT64;
503 else
504 flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
505 }
506 break;
507
508 default:
509 abort();
510 }
511
512 for (i = 0, n = cif->nargs; i < n; i++)
513 if (is_vfp_type (cif->arg_types[i]))
514 {
515 flags |= AARCH64_FLAG_ARG_V;
516 break;
517 }
518
519 /* Round the stack up to a multiple of the stack alignment requirement. */
520 cif->bytes = ALIGN(bytes, 16);
521 cif->flags = flags;
522 #if defined (__APPLE__)
523 cif->aarch64_nfixedargs = 0;
524 #endif
525
526 return FFI_OK;
527 }
528
529 #if defined (__APPLE__)
530 /* Perform Apple-specific cif processing for variadic calls */
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)531 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
532 unsigned int nfixedargs,
533 unsigned int ntotalargs)
534 {
535 ffi_status status = ffi_prep_cif_machdep (cif);
536 cif->aarch64_nfixedargs = nfixedargs;
537 return status;
538 }
539 #endif /* __APPLE__ */
540
541 extern void ffi_call_SYSV (struct call_context *context, void *frame,
542 void (*fn)(void), void *rvalue, int flags,
543 void *closure) FFI_HIDDEN;
544
545 /* Call a function with the provided arguments and capture the return
546 value. */
547 static void
ffi_call_int(ffi_cif * cif,void (* fn)(void),void * orig_rvalue,void ** avalue,void * closure)548 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
549 void **avalue, void *closure)
550 {
551 struct call_context *context;
552 void *stack, *frame, *rvalue;
553 struct arg_state state;
554 size_t stack_bytes, rtype_size, rsize;
555 int i, nargs, flags;
556 ffi_type *rtype;
557
558 flags = cif->flags;
559 rtype = cif->rtype;
560 rtype_size = rtype->size;
561 stack_bytes = cif->bytes;
562
563 /* If the target function returns a structure via hidden pointer,
564 then we cannot allow a null rvalue. Otherwise, mash a null
565 rvalue to void return type. */
566 rsize = 0;
567 if (flags & AARCH64_RET_IN_MEM)
568 {
569 if (orig_rvalue == NULL)
570 rsize = rtype_size;
571 }
572 else if (orig_rvalue == NULL)
573 flags &= AARCH64_FLAG_ARG_V;
574 else if (flags & AARCH64_RET_NEED_COPY)
575 rsize = 16;
576
577 /* Allocate consectutive stack for everything we'll need. */
578 context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
579 stack = context + 1;
580 frame = stack + stack_bytes;
581 rvalue = (rsize ? frame + 32 : orig_rvalue);
582
583 arg_init (&state);
584 for (i = 0, nargs = cif->nargs; i < nargs; i++)
585 {
586 ffi_type *ty = cif->arg_types[i];
587 size_t s = ty->size;
588 void *a = avalue[i];
589 int h, t;
590
591 t = ty->type;
592 switch (t)
593 {
594 case FFI_TYPE_VOID:
595 FFI_ASSERT (0);
596 break;
597
598 /* If the argument is a basic type the argument is allocated to an
599 appropriate register, or if none are available, to the stack. */
600 case FFI_TYPE_INT:
601 case FFI_TYPE_UINT8:
602 case FFI_TYPE_SINT8:
603 case FFI_TYPE_UINT16:
604 case FFI_TYPE_SINT16:
605 case FFI_TYPE_UINT32:
606 case FFI_TYPE_SINT32:
607 case FFI_TYPE_UINT64:
608 case FFI_TYPE_SINT64:
609 case FFI_TYPE_POINTER:
610 do_pointer:
611 {
612 ffi_arg ext = extend_integer_type (a, t);
613 if (state.ngrn < N_X_ARG_REG)
614 context->x[state.ngrn++] = ext;
615 else
616 {
617 void *d = allocate_to_stack (&state, stack, ty->alignment, s);
618 state.ngrn = N_X_ARG_REG;
619 /* Note that the default abi extends each argument
620 to a full 64-bit slot, while the iOS abi allocates
621 only enough space. */
622 #ifdef __APPLE__
623 memcpy(d, a, s);
624 #else
625 *(ffi_arg *)d = ext;
626 #endif
627 }
628 }
629 break;
630
631 case FFI_TYPE_FLOAT:
632 case FFI_TYPE_DOUBLE:
633 case FFI_TYPE_LONGDOUBLE:
634 case FFI_TYPE_STRUCT:
635 case FFI_TYPE_COMPLEX:
636 {
637 void *dest;
638
639 h = is_vfp_type (ty);
640 if (h)
641 {
642 int elems = 4 - (h & 3);
643 if (state.nsrn + elems <= N_V_ARG_REG)
644 {
645 dest = &context->v[state.nsrn];
646 state.nsrn += elems;
647 extend_hfa_type (dest, a, h);
648 break;
649 }
650 state.nsrn = N_V_ARG_REG;
651 dest = allocate_to_stack (&state, stack, ty->alignment, s);
652 }
653 else if (s > 16)
654 {
655 /* If the argument is a composite type that is larger than 16
656 bytes, then the argument has been copied to memory, and
657 the argument is replaced by a pointer to the copy. */
658 a = &avalue[i];
659 t = FFI_TYPE_POINTER;
660 goto do_pointer;
661 }
662 else
663 {
664 size_t n = (s + 7) / 8;
665 if (state.ngrn + n <= N_X_ARG_REG)
666 {
667 /* If the argument is a composite type and the size in
668 double-words is not more than the number of available
669 X registers, then the argument is copied into
670 consecutive X registers. */
671 dest = &context->x[state.ngrn];
672 state.ngrn += n;
673 }
674 else
675 {
676 /* Otherwise, there are insufficient X registers. Further
677 X register allocations are prevented, the NSAA is
678 adjusted and the argument is copied to memory at the
679 adjusted NSAA. */
680 state.ngrn = N_X_ARG_REG;
681 dest = allocate_to_stack (&state, stack, ty->alignment, s);
682 }
683 }
684 memcpy (dest, a, s);
685 }
686 break;
687
688 default:
689 abort();
690 }
691
692 #if defined (__APPLE__)
693 if (i + 1 == cif->aarch64_nfixedargs)
694 {
695 state.ngrn = N_X_ARG_REG;
696 state.nsrn = N_V_ARG_REG;
697 state.allocating_variadic = 1;
698 }
699 #endif
700 }
701
702 ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
703
704 if (flags & AARCH64_RET_NEED_COPY)
705 memcpy (orig_rvalue, rvalue, rtype_size);
706 }
707
708 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)709 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
710 {
711 ffi_call_int (cif, fn, rvalue, avalue, NULL);
712 }
713
714 #ifdef FFI_GO_CLOSURES
715 void
ffi_call_go(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue,void * closure)716 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
717 void **avalue, void *closure)
718 {
719 ffi_call_int (cif, fn, rvalue, avalue, closure);
720 }
721 #endif /* FFI_GO_CLOSURES */
722
723 /* Build a trampoline. */
724
725 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
726 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
727
728 #if FFI_EXEC_TRAMPOLINE_TABLE
729
730 #include <mach/mach.h>
731 #include <pthread.h>
732 #include <stdio.h>
733 #include <stdlib.h>
734
735 extern void *ffi_closure_trampoline_table_page;
736
737 typedef struct ffi_trampoline_table ffi_trampoline_table;
738 typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
739
740 struct ffi_trampoline_table
741 {
742 /* contiguous writable and executable pages */
743 vm_address_t config_page;
744 vm_address_t trampoline_page;
745
746 /* free list tracking */
747 uint16_t free_count;
748 ffi_trampoline_table_entry *free_list;
749 ffi_trampoline_table_entry *free_list_pool;
750
751 ffi_trampoline_table *prev;
752 ffi_trampoline_table *next;
753 };
754
755 struct ffi_trampoline_table_entry
756 {
757 void *(*trampoline) ();
758 ffi_trampoline_table_entry *next;
759 };
760
761 /* The trampoline configuration is placed a page prior to the trampoline's entry point */
762 #define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - PAGE_SIZE));
763
764 /* Total number of trampolines that fit in one trampoline table */
765 #define FFI_TRAMPOLINE_COUNT (PAGE_SIZE / FFI_TRAMPOLINE_SIZE)
766
767 static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
768 static ffi_trampoline_table *ffi_trampoline_tables = NULL;
769
770 static ffi_trampoline_table *
ffi_trampoline_table_alloc()771 ffi_trampoline_table_alloc ()
772 {
773 ffi_trampoline_table *table = NULL;
774
775 /* Loop until we can allocate two contiguous pages */
776 while (table == NULL)
777 {
778 vm_address_t config_page = 0x0;
779 kern_return_t kt;
780
781 /* Try to allocate two pages */
782 kt =
783 vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2,
784 VM_FLAGS_ANYWHERE);
785 if (kt != KERN_SUCCESS)
786 {
787 fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt,
788 __FILE__, __LINE__);
789 break;
790 }
791
792 /* Now drop the second half of the allocation to make room for the trampoline table */
793 vm_address_t trampoline_page = config_page + PAGE_SIZE;
794 kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
795 if (kt != KERN_SUCCESS)
796 {
797 fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
798 __FILE__, __LINE__);
799 break;
800 }
801
802 /* Remap the trampoline table to directly follow the config page */
803 vm_prot_t cur_prot;
804 vm_prot_t max_prot;
805
806 kt =
807 vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE,
808 mach_task_self (),
809 (vm_address_t) & ffi_closure_trampoline_table_page, FALSE,
810 &cur_prot, &max_prot, VM_INHERIT_SHARE);
811
812 /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
813 if (kt != KERN_SUCCESS)
814 {
815 /* Log unexpected failures */
816 if (kt != KERN_NO_SPACE)
817 {
818 fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt,
819 __FILE__, __LINE__);
820 }
821
822 vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
823 continue;
824 }
825
826 /* We have valid trampoline and config pages */
827 table = calloc (1, sizeof (ffi_trampoline_table));
828 table->free_count = FFI_TRAMPOLINE_COUNT;
829 table->config_page = config_page;
830 table->trampoline_page = trampoline_page;
831
832 /* Create and initialize the free list */
833 table->free_list_pool =
834 calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
835
836 uint16_t i;
837 for (i = 0; i < table->free_count; i++)
838 {
839 ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
840 entry->trampoline =
841 (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
842
843 if (i < table->free_count - 1)
844 entry->next = &table->free_list_pool[i + 1];
845 }
846
847 table->free_list = table->free_list_pool;
848 }
849
850 return table;
851 }
852
853 void *
ffi_closure_alloc(size_t size,void ** code)854 ffi_closure_alloc (size_t size, void **code)
855 {
856 /* Create the closure */
857 ffi_closure *closure = malloc (size);
858 if (closure == NULL)
859 return NULL;
860
861 pthread_mutex_lock (&ffi_trampoline_lock);
862
863 /* Check for an active trampoline table with available entries. */
864 ffi_trampoline_table *table = ffi_trampoline_tables;
865 if (table == NULL || table->free_list == NULL)
866 {
867 table = ffi_trampoline_table_alloc ();
868 if (table == NULL)
869 {
870 free (closure);
871 return NULL;
872 }
873
874 /* Insert the new table at the top of the list */
875 table->next = ffi_trampoline_tables;
876 if (table->next != NULL)
877 table->next->prev = table;
878
879 ffi_trampoline_tables = table;
880 }
881
882 /* Claim the free entry */
883 ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
884 ffi_trampoline_tables->free_list = entry->next;
885 ffi_trampoline_tables->free_count--;
886 entry->next = NULL;
887
888 pthread_mutex_unlock (&ffi_trampoline_lock);
889
890 /* Initialize the return values */
891 *code = entry->trampoline;
892 closure->trampoline_table = table;
893 closure->trampoline_table_entry = entry;
894
895 return closure;
896 }
897
898 void
ffi_closure_free(void * ptr)899 ffi_closure_free (void *ptr)
900 {
901 ffi_closure *closure = ptr;
902
903 pthread_mutex_lock (&ffi_trampoline_lock);
904
905 /* Fetch the table and entry references */
906 ffi_trampoline_table *table = closure->trampoline_table;
907 ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
908
909 /* Return the entry to the free list */
910 entry->next = table->free_list;
911 table->free_list = entry;
912 table->free_count++;
913
914 /* If all trampolines within this table are free, and at least one other table exists, deallocate
915 * the table */
916 if (table->free_count == FFI_TRAMPOLINE_COUNT
917 && ffi_trampoline_tables != table)
918 {
919 /* Remove from the list */
920 if (table->prev != NULL)
921 table->prev->next = table->next;
922
923 if (table->next != NULL)
924 table->next->prev = table->prev;
925
926 /* Deallocate pages */
927 kern_return_t kt;
928 kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
929 if (kt != KERN_SUCCESS)
930 fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
931 __FILE__, __LINE__);
932
933 kt =
934 vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
935 if (kt != KERN_SUCCESS)
936 fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
937 __FILE__, __LINE__);
938
939 /* Deallocate free list */
940 free (table->free_list_pool);
941 free (table);
942 }
943 else if (ffi_trampoline_tables != table)
944 {
945 /* Otherwise, bump this table to the top of the list */
946 table->prev = NULL;
947 table->next = ffi_trampoline_tables;
948 if (ffi_trampoline_tables != NULL)
949 ffi_trampoline_tables->prev = table;
950
951 ffi_trampoline_tables = table;
952 }
953
954 pthread_mutex_unlock (&ffi_trampoline_lock);
955
956 /* Free the closure */
957 free (closure);
958 }
959
960 #endif
961
962 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)963 ffi_prep_closure_loc (ffi_closure *closure,
964 ffi_cif* cif,
965 void (*fun)(ffi_cif*,void*,void**,void*),
966 void *user_data,
967 void *codeloc)
968 {
969 if (cif->abi != FFI_SYSV)
970 return FFI_BAD_ABI;
971
972 void (*start)(void);
973
974 if (cif->flags & AARCH64_FLAG_ARG_V)
975 start = ffi_closure_SYSV_V;
976 else
977 start = ffi_closure_SYSV;
978
979 #if FFI_EXEC_TRAMPOLINE_TABLE
980 void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc);
981 config[0] = closure;
982 config[1] = start;
983 #else
984 static const unsigned char trampoline[16] = {
985 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
986 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
987 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
988 };
989 char *tramp = closure->tramp;
990
991 memcpy (tramp, trampoline, sizeof(trampoline));
992
993 *(UINT64 *)(tramp + 16) = (uintptr_t)start;
994
995 ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
996 #endif
997
998 closure->cif = cif;
999 closure->fun = fun;
1000 closure->user_data = user_data;
1001
1002 return FFI_OK;
1003 }
1004
1005 #ifdef FFI_GO_CLOSURES
1006 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
1007 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
1008
1009 ffi_status
ffi_prep_go_closure(ffi_go_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *))1010 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
1011 void (*fun)(ffi_cif*,void*,void**,void*))
1012 {
1013 void (*start)(void);
1014
1015 if (cif->abi != FFI_SYSV)
1016 return FFI_BAD_ABI;
1017
1018 if (cif->flags & AARCH64_FLAG_ARG_V)
1019 start = ffi_go_closure_SYSV_V;
1020 else
1021 start = ffi_go_closure_SYSV;
1022
1023 closure->tramp = start;
1024 closure->cif = cif;
1025 closure->fun = fun;
1026
1027 return FFI_OK;
1028 }
1029 #endif /* FFI_GO_CLOSURES */
1030
1031 /* Primary handler to setup and invoke a function within a closure.
1032
1033 A closure when invoked enters via the assembler wrapper
1034 ffi_closure_SYSV(). The wrapper allocates a call context on the
1035 stack, saves the interesting registers (from the perspective of
1036 the calling convention) into the context then passes control to
1037 ffi_closure_SYSV_inner() passing the saved context and a pointer to
1038 the stack at the point ffi_closure_SYSV() was invoked.
1039
1040 On the return path the assembler wrapper will reload call context
1041 registers.
1042
1043 ffi_closure_SYSV_inner() marshalls the call context into ffi value
1044 descriptors, invokes the wrapped function, then marshalls the return
1045 value back into the call context. */
1046
1047 int FFI_HIDDEN
ffi_closure_SYSV_inner(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,struct call_context * context,void * stack,void * rvalue,void * struct_rvalue)1048 ffi_closure_SYSV_inner (ffi_cif *cif,
1049 void (*fun)(ffi_cif*,void*,void**,void*),
1050 void *user_data,
1051 struct call_context *context,
1052 void *stack, void *rvalue, void *struct_rvalue)
1053 {
1054 void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
1055 int i, h, nargs, flags;
1056 struct arg_state state;
1057
1058 arg_init (&state);
1059
1060 for (i = 0, nargs = cif->nargs; i < nargs; i++)
1061 {
1062 ffi_type *ty = cif->arg_types[i];
1063 int t = ty->type;
1064 size_t n, s = ty->size;
1065
1066 switch (t)
1067 {
1068 case FFI_TYPE_VOID:
1069 FFI_ASSERT (0);
1070 break;
1071
1072 case FFI_TYPE_INT:
1073 case FFI_TYPE_UINT8:
1074 case FFI_TYPE_SINT8:
1075 case FFI_TYPE_UINT16:
1076 case FFI_TYPE_SINT16:
1077 case FFI_TYPE_UINT32:
1078 case FFI_TYPE_SINT32:
1079 case FFI_TYPE_UINT64:
1080 case FFI_TYPE_SINT64:
1081 case FFI_TYPE_POINTER:
1082 avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
1083 break;
1084
1085 case FFI_TYPE_FLOAT:
1086 case FFI_TYPE_DOUBLE:
1087 case FFI_TYPE_LONGDOUBLE:
1088 case FFI_TYPE_STRUCT:
1089 case FFI_TYPE_COMPLEX:
1090 h = is_vfp_type (ty);
1091 if (h)
1092 {
1093 n = 4 - (h & 3);
1094 if (state.nsrn + n <= N_V_ARG_REG)
1095 {
1096 void *reg = &context->v[state.nsrn];
1097 state.nsrn += n;
1098
1099 /* Eeek! We need a pointer to the structure, however the
1100 homogeneous float elements are being passed in individual
1101 registers, therefore for float and double the structure
1102 is not represented as a contiguous sequence of bytes in
1103 our saved register context. We don't need the original
1104 contents of the register storage, so we reformat the
1105 structure into the same memory. */
1106 avalue[i] = compress_hfa_type (reg, reg, h);
1107 }
1108 else
1109 {
1110 state.nsrn = N_V_ARG_REG;
1111 avalue[i] = allocate_to_stack (&state, stack,
1112 ty->alignment, s);
1113 }
1114 }
1115 else if (s > 16)
1116 {
1117 /* Replace Composite type of size greater than 16 with a
1118 pointer. */
1119 avalue[i] = *(void **)
1120 allocate_int_to_reg_or_stack (context, &state, stack,
1121 sizeof (void *));
1122 }
1123 else
1124 {
1125 n = (s + 7) / 8;
1126 if (state.ngrn + n <= N_X_ARG_REG)
1127 {
1128 avalue[i] = &context->x[state.ngrn];
1129 state.ngrn += n;
1130 }
1131 else
1132 {
1133 state.ngrn = N_X_ARG_REG;
1134 avalue[i] = allocate_to_stack (&state, stack,
1135 ty->alignment, s);
1136 }
1137 }
1138 break;
1139
1140 default:
1141 abort();
1142 }
1143 }
1144
1145 flags = cif->flags;
1146 if (flags & AARCH64_RET_IN_MEM)
1147 rvalue = struct_rvalue;
1148
1149 fun (cif, rvalue, avalue, user_data);
1150
1151 return flags;
1152 }
1153