1 /* -----------------------------------------------------------------------
2    ffi.c - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
3 
4    x86-64 Foreign Function Interface
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    ``Software''), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice shall be included
15    in all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
18    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20    IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23    OTHER DEALINGS IN THE SOFTWARE.
24    ----------------------------------------------------------------------- */
25 
26 #include <ffi.h>
27 #include <ffi_common.h>
28 
29 #include <stdlib.h>
30 #include <stdarg.h>
31 
32 #ifdef __x86_64__
33 
34 #define MAX_GPR_REGS 6
35 #define MAX_SSE_REGS 8
36 
37 struct register_args
38 {
39   /* Registers for argument passing.  */
40   UINT64 gpr[MAX_GPR_REGS];
41   __int128_t sse[MAX_SSE_REGS];
42 };
43 
44 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
45 			     void *raddr, void (*fnaddr)());
46 
47 /* All reference to register classes here is identical to the code in
48    gcc/config/i386/i386.c. Do *not* change one without the other.  */
49 
50 /* Register class used for passing given 64bit part of the argument.
51    These represent classes as documented by the PS ABI, with the exception
52    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
53    use SF or DFmode move instead of DImode to avoid reformating penalties.
54 
55    Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
56    whenever possible (upper half does contain padding).  */
57 enum x86_64_reg_class
58   {
59     X86_64_NO_CLASS,
60     X86_64_INTEGER_CLASS,
61     X86_64_INTEGERSI_CLASS,
62     X86_64_SSE_CLASS,
63     X86_64_SSESF_CLASS,
64     X86_64_SSEDF_CLASS,
65     X86_64_SSEUP_CLASS,
66     X86_64_X87_CLASS,
67     X86_64_X87UP_CLASS,
68     X86_64_COMPLEX_X87_CLASS,
69     X86_64_MEMORY_CLASS
70   };
71 
72 #define MAX_CLASSES 4
73 
74 #define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
75 
76 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
77    of this code is to classify each 8bytes of incoming argument by the register
78    class and assign registers accordingly.  */
79 
80 /* Return the union class of CLASS1 and CLASS2.
81    See the x86-64 PS ABI for details.  */
82 
83 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)84 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
85 {
86   /* Rule #1: If both classes are equal, this is the resulting class.  */
87   if (class1 == class2)
88     return class1;
89 
90   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
91      the other class.  */
92   if (class1 == X86_64_NO_CLASS)
93     return class2;
94   if (class2 == X86_64_NO_CLASS)
95     return class1;
96 
97   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
98   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
99     return X86_64_MEMORY_CLASS;
100 
101   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
102   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
103       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
104     return X86_64_INTEGERSI_CLASS;
105   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
106       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
107     return X86_64_INTEGER_CLASS;
108 
109   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
110      MEMORY is used.  */
111   if (class1 == X86_64_X87_CLASS
112       || class1 == X86_64_X87UP_CLASS
113       || class1 == X86_64_COMPLEX_X87_CLASS
114       || class2 == X86_64_X87_CLASS
115       || class2 == X86_64_X87UP_CLASS
116       || class2 == X86_64_COMPLEX_X87_CLASS)
117     return X86_64_MEMORY_CLASS;
118 
119   /* Rule #6: Otherwise class SSE is used.  */
120   return X86_64_SSE_CLASS;
121 }
122 
123 /* Classify the argument of type TYPE and mode MODE.
124    CLASSES will be filled by the register class used to pass each word
125    of the operand.  The number of words is returned.  In case the parameter
126    should be passed in memory, 0 is returned. As a special case for zero
127    sized containers, classes[0] will be NO_CLASS and 1 is returned.
128 
129    See the x86-64 PS ABI for details.
130 */
131 static int
classify_argument(ffi_type * type,enum x86_64_reg_class classes[],size_t byte_offset)132 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
133 		   size_t byte_offset)
134 {
135   switch (type->type)
136     {
137     case FFI_TYPE_UINT8:
138     case FFI_TYPE_SINT8:
139     case FFI_TYPE_UINT16:
140     case FFI_TYPE_SINT16:
141     case FFI_TYPE_UINT32:
142     case FFI_TYPE_SINT32:
143     case FFI_TYPE_UINT64:
144     case FFI_TYPE_SINT64:
145     case FFI_TYPE_POINTER:
146       if (byte_offset + type->size <= 4)
147 	classes[0] = X86_64_INTEGERSI_CLASS;
148       else
149 	classes[0] = X86_64_INTEGER_CLASS;
150       return 1;
151     case FFI_TYPE_FLOAT:
152       if (byte_offset == 0)
153 	classes[0] = X86_64_SSESF_CLASS;
154       else
155 	classes[0] = X86_64_SSE_CLASS;
156       return 1;
157     case FFI_TYPE_DOUBLE:
158       classes[0] = X86_64_SSEDF_CLASS;
159       return 1;
160     case FFI_TYPE_LONGDOUBLE:
161       classes[0] = X86_64_X87_CLASS;
162       classes[1] = X86_64_X87UP_CLASS;
163       return 2;
164     case FFI_TYPE_STRUCT:
165       {
166 	const int UNITS_PER_WORD = 8;
167 	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
168 	ffi_type **ptr;
169 	int i;
170 	enum x86_64_reg_class subclasses[MAX_CLASSES];
171 
172 	/* If the struct is larger than 16 bytes, pass it on the stack.  */
173 	if (type->size > 16)
174 	  return 0;
175 
176 	for (i = 0; i < words; i++)
177 	  classes[i] = X86_64_NO_CLASS;
178 
179 	/* Merge the fields of structure.  */
180 	for (ptr = type->elements; *ptr != NULL; ptr++)
181 	  {
182 	    int num;
183 
184 	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
185 
186 	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
187 	    if (num == 0)
188 	      return 0;
189 	    for (i = 0; i < num; i++)
190 	      {
191 		int pos = byte_offset / 8;
192 		classes[i + pos] =
193 		  merge_classes (subclasses[i], classes[i + pos]);
194 	      }
195 
196 	    byte_offset += (*ptr)->size;
197 	  }
198 
199 	/* Final merger cleanup.  */
200 	for (i = 0; i < words; i++)
201 	  {
202 	    /* If one class is MEMORY, everything should be passed in
203 	       memory.  */
204 	    if (classes[i] == X86_64_MEMORY_CLASS)
205 	      return 0;
206 
207 	    /* The X86_64_SSEUP_CLASS should be always preceded by
208 	       X86_64_SSE_CLASS.  */
209 	    if (classes[i] == X86_64_SSEUP_CLASS
210 		&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
211 	      classes[i] = X86_64_SSE_CLASS;
212 
213 	    /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
214 	    if (classes[i] == X86_64_X87UP_CLASS
215 		&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
216 	      classes[i] = X86_64_SSE_CLASS;
217 	  }
218 	return words;
219       }
220 
221     default:
222       FFI_ASSERT(0);
223     }
224   return 0; /* Never reached.  */
225 }
226 
227 /* Examine the argument and return set number of register required in each
228    class.  Return zero iff parameter should be passed in memory, otherwise
229    the number of registers.  */
230 
231 static int
examine_argument(ffi_type * type,enum x86_64_reg_class classes[MAX_CLASSES],_Bool in_return,int * pngpr,int * pnsse)232 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
233 		  _Bool in_return, int *pngpr, int *pnsse)
234 {
235   int i, n, ngpr, nsse;
236 
237   n = classify_argument (type, classes, 0);
238   if (n == 0)
239     return 0;
240 
241   ngpr = nsse = 0;
242   for (i = 0; i < n; ++i)
243     switch (classes[i])
244       {
245       case X86_64_INTEGER_CLASS:
246       case X86_64_INTEGERSI_CLASS:
247 	ngpr++;
248 	break;
249       case X86_64_SSE_CLASS:
250       case X86_64_SSESF_CLASS:
251       case X86_64_SSEDF_CLASS:
252 	nsse++;
253 	break;
254       case X86_64_NO_CLASS:
255       case X86_64_SSEUP_CLASS:
256 	break;
257       case X86_64_X87_CLASS:
258       case X86_64_X87UP_CLASS:
259       case X86_64_COMPLEX_X87_CLASS:
260 	return in_return != 0;
261       default:
262 	abort ();
263       }
264 
265   *pngpr = ngpr;
266   *pnsse = nsse;
267 
268   return n;
269 }
270 
271 /* Perform machine dependent cif processing.  */
272 
273 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)274 ffi_prep_cif_machdep (ffi_cif *cif)
275 {
276   int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
277   enum x86_64_reg_class classes[MAX_CLASSES];
278   size_t bytes;
279 
280   gprcount = ssecount = 0;
281 
282   flags = cif->rtype->type;
283   if (flags != FFI_TYPE_VOID)
284     {
285       n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
286       if (n == 0)
287 	{
288 	  /* The return value is passed in memory.  A pointer to that
289 	     memory is the first argument.  Allocate a register for it.  */
290 	  gprcount++;
291 	  /* We don't have to do anything in asm for the return.  */
292 	  flags = FFI_TYPE_VOID;
293 	}
294       else if (flags == FFI_TYPE_STRUCT)
295 	{
296 	  /* Mark which registers the result appears in.  */
297 	  _Bool sse0 = SSE_CLASS_P (classes[0]);
298 	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
299 	  if (sse0 && !sse1)
300 	    flags |= 1 << 8;
301 	  else if (!sse0 && sse1)
302 	    flags |= 1 << 9;
303 	  else if (sse0 && sse1)
304 	    flags |= 1 << 10;
305 	  /* Mark the true size of the structure.  */
306 	  flags |= cif->rtype->size << 11;
307 	}
308     }
309   cif->flags = flags;
310 
311   /* Go over all arguments and determine the way they should be passed.
312      If it's in a register and there is space for it, let that be so. If
313      not, add it's size to the stack byte count.  */
314   for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
315     {
316       if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
317 	  || gprcount + ngpr > MAX_GPR_REGS
318 	  || ssecount + nsse > MAX_SSE_REGS)
319 	{
320 	  long align = cif->arg_types[i]->alignment;
321 
322 	  if (align < 8)
323 	    align = 8;
324 
325 	  bytes = ALIGN(bytes, align);
326 	  bytes += cif->arg_types[i]->size;
327 	}
328       else
329 	{
330 	  gprcount += ngpr;
331 	  ssecount += nsse;
332 	}
333     }
334   cif->bytes = bytes;
335 
336   return FFI_OK;
337 }
338 
339 void
ffi_call(ffi_cif * cif,void (* fn)(),void * rvalue,void ** avalue)340 ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
341 {
342   enum x86_64_reg_class classes[MAX_CLASSES];
343   char *stack, *argp;
344   ffi_type **arg_types;
345   int gprcount, ssecount, ngpr, nsse, i, avn;
346   _Bool ret_in_memory;
347   struct register_args *reg_args;
348 
349   /* Can't call 32-bit mode from 64-bit mode.  */
350   FFI_ASSERT (cif->abi == FFI_UNIX64);
351 
352   /* If the return value is a struct and we don't have a return value
353      address then we need to make one.  Note the setting of flags to
354      VOID above in ffi_prep_cif_machdep.  */
355   ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
356 		   && cif->flags == FFI_TYPE_VOID);
357   if (rvalue == NULL && ret_in_memory)
358     rvalue = alloca (cif->rtype->size);
359 
360   /* Allocate the space for the arguments, plus 4 words of temp space.  */
361   stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
362   reg_args = (struct register_args *) stack;
363   argp = stack + sizeof (struct register_args);
364 
365   gprcount = ssecount = 0;
366 
367   /* If the return value is passed in memory, add the pointer as the
368      first integer argument.  */
369   if (ret_in_memory)
370     reg_args->gpr[gprcount++] = (long) rvalue;
371 
372   avn = cif->nargs;
373   arg_types = cif->arg_types;
374 
375   for (i = 0; i < avn; ++i)
376     {
377       size_t size = arg_types[i]->size;
378       int n;
379 
380       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
381       if (n == 0
382 	  || gprcount + ngpr > MAX_GPR_REGS
383 	  || ssecount + nsse > MAX_SSE_REGS)
384 	{
385 	  long align = arg_types[i]->alignment;
386 
387 	  /* Stack arguments are *always* at least 8 byte aligned.  */
388 	  if (align < 8)
389 	    align = 8;
390 
391 	  /* Pass this argument in memory.  */
392 	  argp = (void *) ALIGN (argp, align);
393 	  memcpy (argp, avalue[i], size);
394 	  argp += size;
395 	}
396       else
397 	{
398 	  /* The argument is passed entirely in registers.  */
399 	  char *a = (char *) avalue[i];
400 	  int j;
401 
402 	  for (j = 0; j < n; j++, a += 8, size -= 8)
403 	    {
404 	      switch (classes[j])
405 		{
406 		case X86_64_INTEGER_CLASS:
407 		case X86_64_INTEGERSI_CLASS:
408 		  reg_args->gpr[gprcount] = 0;
409 		  memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
410 		  gprcount++;
411 		  break;
412 		case X86_64_SSE_CLASS:
413 		case X86_64_SSEDF_CLASS:
414 		  reg_args->sse[ssecount++] = *(UINT64 *) a;
415 		  break;
416 		case X86_64_SSESF_CLASS:
417 		  reg_args->sse[ssecount++] = *(UINT32 *) a;
418 		  break;
419 		default:
420 		  abort();
421 		}
422 	    }
423 	}
424     }
425 
426   ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
427 		   cif->flags, rvalue, fn);
428 }
429 
430 
431 extern void ffi_closure_unix64(void);
432 
433 ffi_status
ffi_prep_closure(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data)434 ffi_prep_closure (ffi_closure* closure,
435 		  ffi_cif* cif,
436 		  void (*fun)(ffi_cif*, void*, void**, void*),
437 		  void *user_data)
438 {
439   volatile unsigned short *tramp;
440 
441   tramp = (volatile unsigned short *) &closure->tramp[0];
442   tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
443   tramp[5] = 0xba49;		/* mov <data>, %r10	*/
444   tramp[10] = 0xff49;		/* jmp *%r11	*/
445   tramp[11] = 0x00e3;
446   *(void * volatile *) &tramp[1] = ffi_closure_unix64;
447   *(void * volatile *) &tramp[6] = closure;
448 
449   closure->cif = cif;
450   closure->fun = fun;
451   closure->user_data = user_data;
452 
453   return FFI_OK;
454 }
455 
456 int
ffi_closure_unix64_inner(ffi_closure * closure,void * rvalue,struct register_args * reg_args,char * argp)457 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
458 			 struct register_args *reg_args, char *argp)
459 {
460   ffi_cif *cif;
461   void **avalue;
462   ffi_type **arg_types;
463   long i, avn;
464   int gprcount, ssecount, ngpr, nsse;
465   int ret;
466 
467   cif = closure->cif;
468   avalue = alloca(cif->nargs * sizeof(void *));
469   gprcount = ssecount = 0;
470 
471   ret = cif->rtype->type;
472   if (ret != FFI_TYPE_VOID)
473     {
474       enum x86_64_reg_class classes[MAX_CLASSES];
475       int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
476       if (n == 0)
477 	{
478 	  /* The return value goes in memory.  Arrange for the closure
479 	     return value to go directly back to the original caller.  */
480 	  rvalue = (void *) reg_args->gpr[gprcount++];
481 	  /* We don't have to do anything in asm for the return.  */
482 	  ret = FFI_TYPE_VOID;
483 	}
484       else if (ret == FFI_TYPE_STRUCT && n == 2)
485 	{
486 	  /* Mark which register the second word of the structure goes in.  */
487 	  _Bool sse0 = SSE_CLASS_P (classes[0]);
488 	  _Bool sse1 = SSE_CLASS_P (classes[1]);
489 	  if (!sse0 && sse1)
490 	    ret |= 1 << 8;
491 	  else if (sse0 && !sse1)
492 	    ret |= 1 << 9;
493 	}
494     }
495 
496   avn = cif->nargs;
497   arg_types = cif->arg_types;
498 
499   for (i = 0; i < avn; ++i)
500     {
501       enum x86_64_reg_class classes[MAX_CLASSES];
502       int n;
503 
504       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
505       if (n == 0
506 	  || gprcount + ngpr > MAX_GPR_REGS
507 	  || ssecount + nsse > MAX_SSE_REGS)
508 	{
509 	  long align = arg_types[i]->alignment;
510 
511 	  /* Stack arguments are *always* at least 8 byte aligned.  */
512 	  if (align < 8)
513 	    align = 8;
514 
515 	  /* Pass this argument in memory.  */
516 	  argp = (void *) ALIGN (argp, align);
517 	  avalue[i] = argp;
518 	  argp += arg_types[i]->size;
519 	}
520       /* If the argument is in a single register, or two consecutive
521 	 registers, then we can use that address directly.  */
522       else if (n == 1
523 	       || (n == 2
524 		   && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
525 	{
526 	  /* The argument is in a single register.  */
527 	  if (SSE_CLASS_P (classes[0]))
528 	    {
529 	      avalue[i] = &reg_args->sse[ssecount];
530 	      ssecount += n;
531 	    }
532 	  else
533 	    {
534 	      avalue[i] = &reg_args->gpr[gprcount];
535 	      gprcount += n;
536 	    }
537 	}
538       /* Otherwise, allocate space to make them consecutive.  */
539       else
540 	{
541 	  char *a = alloca (16);
542 	  int j;
543 
544 	  avalue[i] = a;
545 	  for (j = 0; j < n; j++, a += 8)
546 	    {
547 	      if (SSE_CLASS_P (classes[j]))
548 		memcpy (a, &reg_args->sse[ssecount++], 8);
549 	      else
550 		memcpy (a, &reg_args->gpr[gprcount++], 8);
551 	    }
552 	}
553     }
554 
555   /* Invoke the closure.  */
556   closure->fun (cif, rvalue, avalue, closure->user_data);
557 
558   /* Tell assembly how to perform return type promotions.  */
559   return ret;
560 }
561 
562 #endif /* __x86_64__ */
563