1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2 
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10 
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdint.h>
25 #include <ffi.h>
26 #include <ffi_common.h>
27 #include "internal.h"
28 
29 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
30    all further uses in this file will refer to the 128-bit type.  */
31 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
32 # if FFI_TYPE_LONGDOUBLE != 4
33 #  error FFI_TYPE_LONGDOUBLE out of date
34 # endif
35 #else
36 # undef FFI_TYPE_LONGDOUBLE
37 # define FFI_TYPE_LONGDOUBLE 4
38 #endif
39 
40 union _d
41 {
42   UINT64 d;
43   UINT32 s[2];
44 };
45 
46 struct _v
47 {
48   union _d d[2] __attribute__((aligned(16)));
49 };
50 
51 struct call_context
52 {
53   struct _v v[N_V_ARG_REG];
54   UINT64 x[N_X_ARG_REG];
55 };
56 
57 #if defined (__clang__) && defined (__APPLE__)
58 extern void sys_icache_invalidate (void *start, size_t len);
59 #endif
60 
61 static inline void
ffi_clear_cache(void * start,void * end)62 ffi_clear_cache (void *start, void *end)
63 {
64 #if defined (__clang__) && defined (__APPLE__)
65   sys_icache_invalidate (start, (char *)end - (char *)start);
66 #elif defined (__GNUC__)
67   __builtin___clear_cache (start, end);
68 #else
69 #error "Missing builtin to flush instruction cache"
70 #endif
71 }
72 
73 /* A subroutine of is_vfp_type.  Given a structure type, return the type code
74    of the first non-structure element.  Recurse for structure elements.
75    Return -1 if the structure is in fact empty, i.e. no nested elements.  */
76 
77 static int
is_hfa0(const ffi_type * ty)78 is_hfa0 (const ffi_type *ty)
79 {
80   ffi_type **elements = ty->elements;
81   int i, ret = -1;
82 
83   if (elements != NULL)
84     for (i = 0; elements[i]; ++i)
85       {
86         ret = elements[i]->type;
87         if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
88           {
89             ret = is_hfa0 (elements[i]);
90             if (ret < 0)
91               continue;
92           }
93         break;
94       }
95 
96   return ret;
97 }
98 
99 /* A subroutine of is_vfp_type.  Given a structure type, return true if all
100    of the non-structure elements are the same as CANDIDATE.  */
101 
102 static int
is_hfa1(const ffi_type * ty,int candidate)103 is_hfa1 (const ffi_type *ty, int candidate)
104 {
105   ffi_type **elements = ty->elements;
106   int i;
107 
108   if (elements != NULL)
109     for (i = 0; elements[i]; ++i)
110       {
111         int t = elements[i]->type;
112         if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
113           {
114             if (!is_hfa1 (elements[i], candidate))
115               return 0;
116           }
117         else if (t != candidate)
118           return 0;
119       }
120 
121   return 1;
122 }
123 
124 /* Determine if TY may be allocated to the FP registers.  This is both an
125    fp scalar type as well as an homogenous floating point aggregate (HFA).
126    That is, a structure consisting of 1 to 4 members of all the same type,
127    where that type is an fp scalar.
128 
129    Returns non-zero iff TY is an HFA.  The result is the AARCH64_RET_*
130    constant for the type.  */
131 
132 static int
is_vfp_type(const ffi_type * ty)133 is_vfp_type (const ffi_type *ty)
134 {
135   ffi_type **elements;
136   int candidate, i;
137   size_t size, ele_count;
138 
139   /* Quickest tests first.  */
140   candidate = ty->type;
141   switch (candidate)
142     {
143     default:
144       return 0;
145     case FFI_TYPE_FLOAT:
146     case FFI_TYPE_DOUBLE:
147     case FFI_TYPE_LONGDOUBLE:
148       ele_count = 1;
149       goto done;
150     case FFI_TYPE_COMPLEX:
151       candidate = ty->elements[0]->type;
152       switch (candidate)
153 	{
154 	case FFI_TYPE_FLOAT:
155 	case FFI_TYPE_DOUBLE:
156 	case FFI_TYPE_LONGDOUBLE:
157 	  ele_count = 2;
158 	  goto done;
159 	}
160       return 0;
161     case FFI_TYPE_STRUCT:
162       break;
163     }
164 
165   /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */
166   size = ty->size;
167   if (size < 4 || size > 64)
168     return 0;
169 
170   /* Find the type of the first non-structure member.  */
171   elements = ty->elements;
172   candidate = elements[0]->type;
173   if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
174     {
175       for (i = 0; ; ++i)
176         {
177           candidate = is_hfa0 (elements[i]);
178           if (candidate >= 0)
179             break;
180         }
181     }
182 
183   /* If the first member is not a floating point type, it's not an HFA.
184      Also quickly re-check the size of the structure.  */
185   switch (candidate)
186     {
187     case FFI_TYPE_FLOAT:
188       ele_count = size / sizeof(float);
189       if (size != ele_count * sizeof(float))
190         return 0;
191       break;
192     case FFI_TYPE_DOUBLE:
193       ele_count = size / sizeof(double);
194       if (size != ele_count * sizeof(double))
195         return 0;
196       break;
197     case FFI_TYPE_LONGDOUBLE:
198       ele_count = size / sizeof(long double);
199       if (size != ele_count * sizeof(long double))
200         return 0;
201       break;
202     default:
203       return 0;
204     }
205   if (ele_count > 4)
206     return 0;
207 
208   /* Finally, make sure that all scalar elements are the same type.  */
209   for (i = 0; elements[i]; ++i)
210     {
211       int t = elements[i]->type;
212       if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
213         {
214           if (!is_hfa1 (elements[i], candidate))
215             return 0;
216         }
217       else if (t != candidate)
218         return 0;
219     }
220 
221   /* All tests succeeded.  Encode the result.  */
222  done:
223   return candidate * 4 + (4 - ele_count);
224 }
225 
226 /* Representation of the procedure call argument marshalling
227    state.
228 
229    The terse state variable names match the names used in the AARCH64
230    PCS. */
231 
232 struct arg_state
233 {
234   unsigned ngrn;                /* Next general-purpose register number. */
235   unsigned nsrn;                /* Next vector register number. */
236   size_t nsaa;                  /* Next stack offset. */
237 
238 #if defined (__APPLE__)
239   unsigned allocating_variadic;
240 #endif
241 };
242 
243 /* Initialize a procedure call argument marshalling state.  */
244 static void
arg_init(struct arg_state * state)245 arg_init (struct arg_state *state)
246 {
247   state->ngrn = 0;
248   state->nsrn = 0;
249   state->nsaa = 0;
250 #if defined (__APPLE__)
251   state->allocating_variadic = 0;
252 #endif
253 }
254 
255 /* Allocate an aligned slot on the stack and return a pointer to it.  */
256 static void *
allocate_to_stack(struct arg_state * state,void * stack,size_t alignment,size_t size)257 allocate_to_stack (struct arg_state *state, void *stack,
258 		   size_t alignment, size_t size)
259 {
260   size_t nsaa = state->nsaa;
261 
262   /* Round up the NSAA to the larger of 8 or the natural
263      alignment of the argument's type.  */
264 #if defined (__APPLE__)
265   if (state->allocating_variadic && alignment < 8)
266     alignment = 8;
267 #else
268   if (alignment < 8)
269     alignment = 8;
270 #endif
271 
272   nsaa = ALIGN (nsaa, alignment);
273   state->nsaa = nsaa + size;
274 
275   return (char *)stack + nsaa;
276 }
277 
278 static ffi_arg
extend_integer_type(void * source,int type)279 extend_integer_type (void *source, int type)
280 {
281   switch (type)
282     {
283     case FFI_TYPE_UINT8:
284       return *(UINT8 *) source;
285     case FFI_TYPE_SINT8:
286       return *(SINT8 *) source;
287     case FFI_TYPE_UINT16:
288       return *(UINT16 *) source;
289     case FFI_TYPE_SINT16:
290       return *(SINT16 *) source;
291     case FFI_TYPE_UINT32:
292       return *(UINT32 *) source;
293     case FFI_TYPE_INT:
294     case FFI_TYPE_SINT32:
295       return *(SINT32 *) source;
296     case FFI_TYPE_UINT64:
297     case FFI_TYPE_SINT64:
298       return *(UINT64 *) source;
299       break;
300     case FFI_TYPE_POINTER:
301       return *(uintptr_t *) source;
302     default:
303       abort();
304     }
305 }
306 
307 static void
extend_hfa_type(void * dest,void * src,int h)308 extend_hfa_type (void *dest, void *src, int h)
309 {
310   int f = h - AARCH64_RET_S4;
311   void *x0;
312 
313   asm volatile (
314 	"adr	%0, 0f\n"
315 "	add	%0, %0, %1\n"
316 "	br	%0\n"
317 "0:	ldp	s16, s17, [%3]\n"	/* S4 */
318 "	ldp	s18, s19, [%3, #8]\n"
319 "	b	4f\n"
320 "	ldp	s16, s17, [%3]\n"	/* S3 */
321 "	ldr	s18, [%3, #8]\n"
322 "	b	3f\n"
323 "	ldp	s16, s17, [%3]\n"	/* S2 */
324 "	b	2f\n"
325 "	nop\n"
326 "	ldr	s16, [%3]\n"		/* S1 */
327 "	b	1f\n"
328 "	nop\n"
329 "	ldp	d16, d17, [%3]\n"	/* D4 */
330 "	ldp	d18, d19, [%3, #16]\n"
331 "	b	4f\n"
332 "	ldp	d16, d17, [%3]\n"	/* D3 */
333 "	ldr	d18, [%3, #16]\n"
334 "	b	3f\n"
335 "	ldp	d16, d17, [%3]\n"	/* D2 */
336 "	b	2f\n"
337 "	nop\n"
338 "	ldr	d16, [%3]\n"		/* D1 */
339 "	b	1f\n"
340 "	nop\n"
341 "	ldp	q16, q17, [%3]\n"	/* Q4 */
342 "	ldp	q18, q19, [%3, #16]\n"
343 "	b	4f\n"
344 "	ldp	q16, q17, [%3]\n"	/* Q3 */
345 "	ldr	q18, [%3, #16]\n"
346 "	b	3f\n"
347 "	ldp	q16, q17, [%3]\n"	/* Q2 */
348 "	b	2f\n"
349 "	nop\n"
350 "	ldr	q16, [%3]\n"		/* Q1 */
351 "	b	1f\n"
352 "4:	str	q19, [%2, #48]\n"
353 "3:	str	q18, [%2, #32]\n"
354 "2:	str	q17, [%2, #16]\n"
355 "1:	str	q16, [%2]"
356     : "=&r"(x0)
357     : "r"(f * 12), "r"(dest), "r"(src)
358     : "memory", "v16", "v17", "v18", "v19");
359 }
360 
361 static void *
compress_hfa_type(void * dest,void * reg,int h)362 compress_hfa_type (void *dest, void *reg, int h)
363 {
364   switch (h)
365     {
366     case AARCH64_RET_S1:
367       if (dest == reg)
368 	{
369 #ifdef __AARCH64EB__
370 	  dest += 12;
371 #endif
372 	}
373       else
374 	*(float *)dest = *(float *)reg;
375       break;
376     case AARCH64_RET_S2:
377       asm ("ldp q16, q17, [%1]\n\t"
378 	   "st2 { v16.s, v17.s }[0], [%0]"
379 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
380       break;
381     case AARCH64_RET_S3:
382       asm ("ldp q16, q17, [%1]\n\t"
383 	   "ldr q18, [%1, #32]\n\t"
384 	   "st3 { v16.s, v17.s, v18.s }[0], [%0]"
385 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
386       break;
387     case AARCH64_RET_S4:
388       asm ("ldp q16, q17, [%1]\n\t"
389 	   "ldp q18, q19, [%1, #32]\n\t"
390 	   "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
391 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
392       break;
393 
394     case AARCH64_RET_D1:
395       if (dest == reg)
396 	{
397 #ifdef __AARCH64EB__
398 	  dest += 8;
399 #endif
400 	}
401       else
402 	*(double *)dest = *(double *)reg;
403       break;
404     case AARCH64_RET_D2:
405       asm ("ldp q16, q17, [%1]\n\t"
406 	   "st2 { v16.d, v17.d }[0], [%0]"
407 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
408       break;
409     case AARCH64_RET_D3:
410       asm ("ldp q16, q17, [%1]\n\t"
411 	   "ldr q18, [%1, #32]\n\t"
412 	   "st3 { v16.d, v17.d, v18.d }[0], [%0]"
413 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
414       break;
415     case AARCH64_RET_D4:
416       asm ("ldp q16, q17, [%1]\n\t"
417 	   "ldp q18, q19, [%1, #32]\n\t"
418 	   "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
419 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
420       break;
421 
422     default:
423       if (dest != reg)
424 	return memcpy (dest, reg, 16 * (4 - (h & 3)));
425       break;
426     }
427   return dest;
428 }
429 
430 /* Either allocate an appropriate register for the argument type, or if
431    none are available, allocate a stack slot and return a pointer
432    to the allocated space.  */
433 
434 static void *
allocate_int_to_reg_or_stack(struct call_context * context,struct arg_state * state,void * stack,size_t size)435 allocate_int_to_reg_or_stack (struct call_context *context,
436 			      struct arg_state *state,
437 			      void *stack, size_t size)
438 {
439   if (state->ngrn < N_X_ARG_REG)
440     return &context->x[state->ngrn++];
441 
442   state->ngrn = N_X_ARG_REG;
443   return allocate_to_stack (state, stack, size, size);
444 }
445 
446 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)447 ffi_prep_cif_machdep (ffi_cif *cif)
448 {
449   ffi_type *rtype = cif->rtype;
450   size_t bytes = cif->bytes;
451   int flags, i, n;
452 
453   switch (rtype->type)
454     {
455     case FFI_TYPE_VOID:
456       flags = AARCH64_RET_VOID;
457       break;
458     case FFI_TYPE_UINT8:
459       flags = AARCH64_RET_UINT8;
460       break;
461     case FFI_TYPE_UINT16:
462       flags = AARCH64_RET_UINT16;
463       break;
464     case FFI_TYPE_UINT32:
465       flags = AARCH64_RET_UINT32;
466       break;
467     case FFI_TYPE_SINT8:
468       flags = AARCH64_RET_SINT8;
469       break;
470     case FFI_TYPE_SINT16:
471       flags = AARCH64_RET_SINT16;
472       break;
473     case FFI_TYPE_INT:
474     case FFI_TYPE_SINT32:
475       flags = AARCH64_RET_SINT32;
476       break;
477     case FFI_TYPE_SINT64:
478     case FFI_TYPE_UINT64:
479       flags = AARCH64_RET_INT64;
480       break;
481     case FFI_TYPE_POINTER:
482       flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
483       break;
484 
485     case FFI_TYPE_FLOAT:
486     case FFI_TYPE_DOUBLE:
487     case FFI_TYPE_LONGDOUBLE:
488     case FFI_TYPE_STRUCT:
489     case FFI_TYPE_COMPLEX:
490       flags = is_vfp_type (rtype);
491       if (flags == 0)
492 	{
493 	  size_t s = rtype->size;
494 	  if (s > 16)
495 	    {
496 	      flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
497 	      bytes += 8;
498 	    }
499 	  else if (s == 16)
500 	    flags = AARCH64_RET_INT128;
501 	  else if (s == 8)
502 	    flags = AARCH64_RET_INT64;
503 	  else
504 	    flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
505 	}
506       break;
507 
508     default:
509       abort();
510     }
511 
512   for (i = 0, n = cif->nargs; i < n; i++)
513     if (is_vfp_type (cif->arg_types[i]))
514       {
515 	flags |= AARCH64_FLAG_ARG_V;
516 	break;
517       }
518 
519   /* Round the stack up to a multiple of the stack alignment requirement. */
520   cif->bytes = ALIGN(bytes, 16);
521   cif->flags = flags;
522 #if defined (__APPLE__)
523   cif->aarch64_nfixedargs = 0;
524 #endif
525 
526   return FFI_OK;
527 }
528 
529 #if defined (__APPLE__)
530 /* Perform Apple-specific cif processing for variadic calls */
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)531 ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
532 				    unsigned int nfixedargs,
533 				    unsigned int ntotalargs)
534 {
535   ffi_status status = ffi_prep_cif_machdep (cif);
536   cif->aarch64_nfixedargs = nfixedargs;
537   return status;
538 }
539 #endif /* __APPLE__ */
540 
541 extern void ffi_call_SYSV (struct call_context *context, void *frame,
542 			   void (*fn)(void), void *rvalue, int flags,
543 			   void *closure) FFI_HIDDEN;
544 
545 /* Call a function with the provided arguments and capture the return
546    value.  */
547 static void
ffi_call_int(ffi_cif * cif,void (* fn)(void),void * orig_rvalue,void ** avalue,void * closure)548 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
549 	      void **avalue, void *closure)
550 {
551   struct call_context *context;
552   void *stack, *frame, *rvalue;
553   struct arg_state state;
554   size_t stack_bytes, rtype_size, rsize;
555   int i, nargs, flags;
556   ffi_type *rtype;
557 
558   flags = cif->flags;
559   rtype = cif->rtype;
560   rtype_size = rtype->size;
561   stack_bytes = cif->bytes;
562 
563   /* If the target function returns a structure via hidden pointer,
564      then we cannot allow a null rvalue.  Otherwise, mash a null
565      rvalue to void return type.  */
566   rsize = 0;
567   if (flags & AARCH64_RET_IN_MEM)
568     {
569       if (orig_rvalue == NULL)
570 	rsize = rtype_size;
571     }
572   else if (orig_rvalue == NULL)
573     flags &= AARCH64_FLAG_ARG_V;
574   else if (flags & AARCH64_RET_NEED_COPY)
575     rsize = 16;
576 
577   /* Allocate consectutive stack for everything we'll need.  */
578   context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
579   stack = context + 1;
580   frame = stack + stack_bytes;
581   rvalue = (rsize ? frame + 32 : orig_rvalue);
582 
583   arg_init (&state);
584   for (i = 0, nargs = cif->nargs; i < nargs; i++)
585     {
586       ffi_type *ty = cif->arg_types[i];
587       size_t s = ty->size;
588       void *a = avalue[i];
589       int h, t;
590 
591       t = ty->type;
592       switch (t)
593 	{
594 	case FFI_TYPE_VOID:
595 	  FFI_ASSERT (0);
596 	  break;
597 
598 	/* If the argument is a basic type the argument is allocated to an
599 	   appropriate register, or if none are available, to the stack.  */
600 	case FFI_TYPE_INT:
601 	case FFI_TYPE_UINT8:
602 	case FFI_TYPE_SINT8:
603 	case FFI_TYPE_UINT16:
604 	case FFI_TYPE_SINT16:
605 	case FFI_TYPE_UINT32:
606 	case FFI_TYPE_SINT32:
607 	case FFI_TYPE_UINT64:
608 	case FFI_TYPE_SINT64:
609 	case FFI_TYPE_POINTER:
610 	do_pointer:
611 	  {
612 	    ffi_arg ext = extend_integer_type (a, t);
613 	    if (state.ngrn < N_X_ARG_REG)
614 	      context->x[state.ngrn++] = ext;
615 	    else
616 	      {
617 		void *d = allocate_to_stack (&state, stack, ty->alignment, s);
618 		state.ngrn = N_X_ARG_REG;
619 		/* Note that the default abi extends each argument
620 		   to a full 64-bit slot, while the iOS abi allocates
621 		   only enough space. */
622 #ifdef __APPLE__
623 		memcpy(d, a, s);
624 #else
625 		*(ffi_arg *)d = ext;
626 #endif
627 	      }
628 	  }
629 	  break;
630 
631 	case FFI_TYPE_FLOAT:
632 	case FFI_TYPE_DOUBLE:
633 	case FFI_TYPE_LONGDOUBLE:
634 	case FFI_TYPE_STRUCT:
635 	case FFI_TYPE_COMPLEX:
636 	  {
637 	    void *dest;
638 
639 	    h = is_vfp_type (ty);
640 	    if (h)
641 	      {
642 		int elems = 4 - (h & 3);
643 	        if (state.nsrn + elems <= N_V_ARG_REG)
644 		  {
645 		    dest = &context->v[state.nsrn];
646 		    state.nsrn += elems;
647 		    extend_hfa_type (dest, a, h);
648 		    break;
649 		  }
650 		state.nsrn = N_V_ARG_REG;
651 		dest = allocate_to_stack (&state, stack, ty->alignment, s);
652 	      }
653 	    else if (s > 16)
654 	      {
655 		/* If the argument is a composite type that is larger than 16
656 		   bytes, then the argument has been copied to memory, and
657 		   the argument is replaced by a pointer to the copy.  */
658 		a = &avalue[i];
659 		t = FFI_TYPE_POINTER;
660 		goto do_pointer;
661 	      }
662 	    else
663 	      {
664 		size_t n = (s + 7) / 8;
665 		if (state.ngrn + n <= N_X_ARG_REG)
666 		  {
667 		    /* If the argument is a composite type and the size in
668 		       double-words is not more than the number of available
669 		       X registers, then the argument is copied into
670 		       consecutive X registers.  */
671 		    dest = &context->x[state.ngrn];
672 		    state.ngrn += n;
673 		  }
674 		else
675 		  {
676 		    /* Otherwise, there are insufficient X registers. Further
677 		       X register allocations are prevented, the NSAA is
678 		       adjusted and the argument is copied to memory at the
679 		       adjusted NSAA.  */
680 		    state.ngrn = N_X_ARG_REG;
681 		    dest = allocate_to_stack (&state, stack, ty->alignment, s);
682 		  }
683 		}
684 	      memcpy (dest, a, s);
685 	    }
686 	  break;
687 
688 	default:
689 	  abort();
690 	}
691 
692 #if defined (__APPLE__)
693       if (i + 1 == cif->aarch64_nfixedargs)
694 	{
695 	  state.ngrn = N_X_ARG_REG;
696 	  state.nsrn = N_V_ARG_REG;
697 	  state.allocating_variadic = 1;
698 	}
699 #endif
700     }
701 
702   ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
703 
704   if (flags & AARCH64_RET_NEED_COPY)
705     memcpy (orig_rvalue, rvalue, rtype_size);
706 }
707 
708 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)709 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
710 {
711   ffi_call_int (cif, fn, rvalue, avalue, NULL);
712 }
713 
714 #ifdef FFI_GO_CLOSURES
715 void
ffi_call_go(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue,void * closure)716 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
717 	     void **avalue, void *closure)
718 {
719   ffi_call_int (cif, fn, rvalue, avalue, closure);
720 }
721 #endif /* FFI_GO_CLOSURES */
722 
723 /* Build a trampoline.  */
724 
725 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
726 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
727 
728 #if FFI_EXEC_TRAMPOLINE_TABLE
729 
730 #include <mach/mach.h>
731 #include <pthread.h>
732 #include <stdio.h>
733 #include <stdlib.h>
734 
735 extern void *ffi_closure_trampoline_table_page;
736 
737 typedef struct ffi_trampoline_table ffi_trampoline_table;
738 typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
739 
740 struct ffi_trampoline_table
741 {
742   /* contiguous writable and executable pages */
743   vm_address_t config_page;
744   vm_address_t trampoline_page;
745 
746   /* free list tracking */
747   uint16_t free_count;
748   ffi_trampoline_table_entry *free_list;
749   ffi_trampoline_table_entry *free_list_pool;
750 
751   ffi_trampoline_table *prev;
752   ffi_trampoline_table *next;
753 };
754 
755 struct ffi_trampoline_table_entry
756 {
757   void *(*trampoline) ();
758   ffi_trampoline_table_entry *next;
759 };
760 
761 /* The trampoline configuration is placed a page prior to the trampoline's entry point */
762 #define FFI_TRAMPOLINE_CODELOC_CONFIG(codeloc) ((void **) (((uint8_t *) codeloc) - PAGE_SIZE));
763 
764 /* Total number of trampolines that fit in one trampoline table */
765 #define FFI_TRAMPOLINE_COUNT (PAGE_SIZE / FFI_TRAMPOLINE_SIZE)
766 
767 static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
768 static ffi_trampoline_table *ffi_trampoline_tables = NULL;
769 
770 static ffi_trampoline_table *
ffi_trampoline_table_alloc()771 ffi_trampoline_table_alloc ()
772 {
773   ffi_trampoline_table *table = NULL;
774 
775   /* Loop until we can allocate two contiguous pages */
776   while (table == NULL)
777     {
778       vm_address_t config_page = 0x0;
779       kern_return_t kt;
780 
781       /* Try to allocate two pages */
782       kt =
783 	vm_allocate (mach_task_self (), &config_page, PAGE_SIZE * 2,
784 		     VM_FLAGS_ANYWHERE);
785       if (kt != KERN_SUCCESS)
786 	{
787 	  fprintf (stderr, "vm_allocate() failure: %d at %s:%d\n", kt,
788 		   __FILE__, __LINE__);
789 	  break;
790 	}
791 
792       /* Now drop the second half of the allocation to make room for the trampoline table */
793       vm_address_t trampoline_page = config_page + PAGE_SIZE;
794       kt = vm_deallocate (mach_task_self (), trampoline_page, PAGE_SIZE);
795       if (kt != KERN_SUCCESS)
796 	{
797 	  fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
798 		   __FILE__, __LINE__);
799 	  break;
800 	}
801 
802       /* Remap the trampoline table to directly follow the config page */
803       vm_prot_t cur_prot;
804       vm_prot_t max_prot;
805 
806       kt =
807 	vm_remap (mach_task_self (), &trampoline_page, PAGE_SIZE, 0x0, FALSE,
808 		  mach_task_self (),
809 		  (vm_address_t) & ffi_closure_trampoline_table_page, FALSE,
810 		  &cur_prot, &max_prot, VM_INHERIT_SHARE);
811 
812       /* If we lost access to the destination trampoline page, drop our config allocation mapping and retry */
813       if (kt != KERN_SUCCESS)
814 	{
815 	  /* Log unexpected failures */
816 	  if (kt != KERN_NO_SPACE)
817 	    {
818 	      fprintf (stderr, "vm_remap() failure: %d at %s:%d\n", kt,
819 		       __FILE__, __LINE__);
820 	    }
821 
822 	  vm_deallocate (mach_task_self (), config_page, PAGE_SIZE);
823 	  continue;
824 	}
825 
826       /* We have valid trampoline and config pages */
827       table = calloc (1, sizeof (ffi_trampoline_table));
828       table->free_count = FFI_TRAMPOLINE_COUNT;
829       table->config_page = config_page;
830       table->trampoline_page = trampoline_page;
831 
832       /* Create and initialize the free list */
833       table->free_list_pool =
834 	calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
835 
836       uint16_t i;
837       for (i = 0; i < table->free_count; i++)
838 	{
839 	  ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
840 	  entry->trampoline =
841 	    (void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
842 
843 	  if (i < table->free_count - 1)
844 	    entry->next = &table->free_list_pool[i + 1];
845 	}
846 
847       table->free_list = table->free_list_pool;
848     }
849 
850   return table;
851 }
852 
853 void *
ffi_closure_alloc(size_t size,void ** code)854 ffi_closure_alloc (size_t size, void **code)
855 {
856   /* Create the closure */
857   ffi_closure *closure = malloc (size);
858   if (closure == NULL)
859     return NULL;
860 
861   pthread_mutex_lock (&ffi_trampoline_lock);
862 
863   /* Check for an active trampoline table with available entries. */
864   ffi_trampoline_table *table = ffi_trampoline_tables;
865   if (table == NULL || table->free_list == NULL)
866     {
867       table = ffi_trampoline_table_alloc ();
868       if (table == NULL)
869 	{
870 	  free (closure);
871 	  return NULL;
872 	}
873 
874       /* Insert the new table at the top of the list */
875       table->next = ffi_trampoline_tables;
876       if (table->next != NULL)
877 	table->next->prev = table;
878 
879       ffi_trampoline_tables = table;
880     }
881 
882   /* Claim the free entry */
883   ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
884   ffi_trampoline_tables->free_list = entry->next;
885   ffi_trampoline_tables->free_count--;
886   entry->next = NULL;
887 
888   pthread_mutex_unlock (&ffi_trampoline_lock);
889 
890   /* Initialize the return values */
891   *code = entry->trampoline;
892   closure->trampoline_table = table;
893   closure->trampoline_table_entry = entry;
894 
895   return closure;
896 }
897 
898 void
ffi_closure_free(void * ptr)899 ffi_closure_free (void *ptr)
900 {
901   ffi_closure *closure = ptr;
902 
903   pthread_mutex_lock (&ffi_trampoline_lock);
904 
905   /* Fetch the table and entry references */
906   ffi_trampoline_table *table = closure->trampoline_table;
907   ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
908 
909   /* Return the entry to the free list */
910   entry->next = table->free_list;
911   table->free_list = entry;
912   table->free_count++;
913 
914   /* If all trampolines within this table are free, and at least one other table exists, deallocate
915    * the table */
916   if (table->free_count == FFI_TRAMPOLINE_COUNT
917       && ffi_trampoline_tables != table)
918     {
919       /* Remove from the list */
920       if (table->prev != NULL)
921 	table->prev->next = table->next;
922 
923       if (table->next != NULL)
924 	table->next->prev = table->prev;
925 
926       /* Deallocate pages */
927       kern_return_t kt;
928       kt = vm_deallocate (mach_task_self (), table->config_page, PAGE_SIZE);
929       if (kt != KERN_SUCCESS)
930 	fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
931 		 __FILE__, __LINE__);
932 
933       kt =
934 	vm_deallocate (mach_task_self (), table->trampoline_page, PAGE_SIZE);
935       if (kt != KERN_SUCCESS)
936 	fprintf (stderr, "vm_deallocate() failure: %d at %s:%d\n", kt,
937 		 __FILE__, __LINE__);
938 
939       /* Deallocate free list */
940       free (table->free_list_pool);
941       free (table);
942     }
943   else if (ffi_trampoline_tables != table)
944     {
945       /* Otherwise, bump this table to the top of the list */
946       table->prev = NULL;
947       table->next = ffi_trampoline_tables;
948       if (ffi_trampoline_tables != NULL)
949 	ffi_trampoline_tables->prev = table;
950 
951       ffi_trampoline_tables = table;
952     }
953 
954   pthread_mutex_unlock (&ffi_trampoline_lock);
955 
956   /* Free the closure */
957   free (closure);
958 }
959 
960 #endif
961 
962 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)963 ffi_prep_closure_loc (ffi_closure *closure,
964                       ffi_cif* cif,
965                       void (*fun)(ffi_cif*,void*,void**,void*),
966                       void *user_data,
967                       void *codeloc)
968 {
969   if (cif->abi != FFI_SYSV)
970     return FFI_BAD_ABI;
971 
972   void (*start)(void);
973 
974   if (cif->flags & AARCH64_FLAG_ARG_V)
975     start = ffi_closure_SYSV_V;
976   else
977     start = ffi_closure_SYSV;
978 
979 #if FFI_EXEC_TRAMPOLINE_TABLE
980   void **config = FFI_TRAMPOLINE_CODELOC_CONFIG (codeloc);
981   config[0] = closure;
982   config[1] = start;
983 #else
984   static const unsigned char trampoline[16] = {
985     0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
986     0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
987     0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
988   };
989   char *tramp = closure->tramp;
990 
991   memcpy (tramp, trampoline, sizeof(trampoline));
992 
993   *(UINT64 *)(tramp + 16) = (uintptr_t)start;
994 
995   ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
996 #endif
997 
998   closure->cif = cif;
999   closure->fun = fun;
1000   closure->user_data = user_data;
1001 
1002   return FFI_OK;
1003 }
1004 
1005 #ifdef FFI_GO_CLOSURES
1006 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
1007 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
1008 
1009 ffi_status
ffi_prep_go_closure(ffi_go_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *))1010 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
1011                      void (*fun)(ffi_cif*,void*,void**,void*))
1012 {
1013   void (*start)(void);
1014 
1015   if (cif->abi != FFI_SYSV)
1016     return FFI_BAD_ABI;
1017 
1018   if (cif->flags & AARCH64_FLAG_ARG_V)
1019     start = ffi_go_closure_SYSV_V;
1020   else
1021     start = ffi_go_closure_SYSV;
1022 
1023   closure->tramp = start;
1024   closure->cif = cif;
1025   closure->fun = fun;
1026 
1027   return FFI_OK;
1028 }
1029 #endif /* FFI_GO_CLOSURES */
1030 
1031 /* Primary handler to setup and invoke a function within a closure.
1032 
1033    A closure when invoked enters via the assembler wrapper
1034    ffi_closure_SYSV(). The wrapper allocates a call context on the
1035    stack, saves the interesting registers (from the perspective of
1036    the calling convention) into the context then passes control to
1037    ffi_closure_SYSV_inner() passing the saved context and a pointer to
1038    the stack at the point ffi_closure_SYSV() was invoked.
1039 
1040    On the return path the assembler wrapper will reload call context
1041    registers.
1042 
1043    ffi_closure_SYSV_inner() marshalls the call context into ffi value
1044    descriptors, invokes the wrapped function, then marshalls the return
1045    value back into the call context.  */
1046 
1047 int FFI_HIDDEN
ffi_closure_SYSV_inner(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,struct call_context * context,void * stack,void * rvalue,void * struct_rvalue)1048 ffi_closure_SYSV_inner (ffi_cif *cif,
1049 			void (*fun)(ffi_cif*,void*,void**,void*),
1050 			void *user_data,
1051 			struct call_context *context,
1052 			void *stack, void *rvalue, void *struct_rvalue)
1053 {
1054   void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
1055   int i, h, nargs, flags;
1056   struct arg_state state;
1057 
1058   arg_init (&state);
1059 
1060   for (i = 0, nargs = cif->nargs; i < nargs; i++)
1061     {
1062       ffi_type *ty = cif->arg_types[i];
1063       int t = ty->type;
1064       size_t n, s = ty->size;
1065 
1066       switch (t)
1067 	{
1068 	case FFI_TYPE_VOID:
1069 	  FFI_ASSERT (0);
1070 	  break;
1071 
1072 	case FFI_TYPE_INT:
1073 	case FFI_TYPE_UINT8:
1074 	case FFI_TYPE_SINT8:
1075 	case FFI_TYPE_UINT16:
1076 	case FFI_TYPE_SINT16:
1077 	case FFI_TYPE_UINT32:
1078 	case FFI_TYPE_SINT32:
1079 	case FFI_TYPE_UINT64:
1080 	case FFI_TYPE_SINT64:
1081 	case FFI_TYPE_POINTER:
1082 	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
1083 	  break;
1084 
1085 	case FFI_TYPE_FLOAT:
1086 	case FFI_TYPE_DOUBLE:
1087 	case FFI_TYPE_LONGDOUBLE:
1088 	case FFI_TYPE_STRUCT:
1089 	case FFI_TYPE_COMPLEX:
1090 	  h = is_vfp_type (ty);
1091 	  if (h)
1092 	    {
1093 	      n = 4 - (h & 3);
1094 	      if (state.nsrn + n <= N_V_ARG_REG)
1095 		{
1096 		  void *reg = &context->v[state.nsrn];
1097 		  state.nsrn += n;
1098 
1099 		  /* Eeek! We need a pointer to the structure, however the
1100 		     homogeneous float elements are being passed in individual
1101 		     registers, therefore for float and double the structure
1102 		     is not represented as a contiguous sequence of bytes in
1103 		     our saved register context.  We don't need the original
1104 		     contents of the register storage, so we reformat the
1105 		     structure into the same memory.  */
1106 		  avalue[i] = compress_hfa_type (reg, reg, h);
1107 		}
1108 	      else
1109 		{
1110 		  state.nsrn = N_V_ARG_REG;
1111 		  avalue[i] = allocate_to_stack (&state, stack,
1112 						 ty->alignment, s);
1113 		}
1114 	    }
1115 	  else if (s > 16)
1116 	    {
1117 	      /* Replace Composite type of size greater than 16 with a
1118 		 pointer.  */
1119 	      avalue[i] = *(void **)
1120 		allocate_int_to_reg_or_stack (context, &state, stack,
1121 					      sizeof (void *));
1122 	    }
1123 	  else
1124 	    {
1125 	      n = (s + 7) / 8;
1126 	      if (state.ngrn + n <= N_X_ARG_REG)
1127 		{
1128 		  avalue[i] = &context->x[state.ngrn];
1129 		  state.ngrn += n;
1130 		}
1131 	      else
1132 		{
1133 		  state.ngrn = N_X_ARG_REG;
1134 		  avalue[i] = allocate_to_stack (&state, stack,
1135 						 ty->alignment, s);
1136 		}
1137 	    }
1138 	  break;
1139 
1140 	default:
1141 	  abort();
1142 	}
1143     }
1144 
1145   flags = cif->flags;
1146   if (flags & AARCH64_RET_IN_MEM)
1147     rvalue = struct_rvalue;
1148 
1149   fun (cif, rvalue, avalue, user_data);
1150 
1151   return flags;
1152 }
1153