1 /* -----------------------------------------------------------------------
2 ffi64.c - Copyright (c) 20011 Anthony Green
3 Copyright (c) 2008, 2010 Red Hat, Inc.
4 Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
5
6 x86-64 Foreign Function Interface
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 ``Software''), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 ----------------------------------------------------------------------- */
28
29 #include <ffi.h>
30 #include <ffi_common.h>
31
32 #include <stdlib.h>
33 #include <stdarg.h>
34
35 #ifdef __x86_64__
36
37 #define MAX_GPR_REGS 6
38 #define MAX_SSE_REGS 8
39
40 #ifdef __INTEL_COMPILER
41 #define UINT128 __m128
42 #else
43 #define UINT128 __int128_t
44 #endif
45
46 struct register_args
47 {
48 /* Registers for argument passing. */
49 UINT64 gpr[MAX_GPR_REGS];
50 UINT128 sse[MAX_SSE_REGS];
51 };
52
53 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
54 void *raddr, void (*fnaddr)(void), unsigned ssecount);
55
56 /* All reference to register classes here is identical to the code in
57 gcc/config/i386/i386.c. Do *not* change one without the other. */
58
59 /* Register class used for passing given 64bit part of the argument.
60 These represent classes as documented by the PS ABI, with the
61 exception of SSESF, SSEDF classes, that are basically SSE class,
62 just gcc will use SF or DFmode move instead of DImode to avoid
63 reformatting penalties.
64
65 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
66 whenever possible (upper half does contain padding). */
67 enum x86_64_reg_class
68 {
69 X86_64_NO_CLASS,
70 X86_64_INTEGER_CLASS,
71 X86_64_INTEGERSI_CLASS,
72 X86_64_SSE_CLASS,
73 X86_64_SSESF_CLASS,
74 X86_64_SSEDF_CLASS,
75 X86_64_SSEUP_CLASS,
76 X86_64_X87_CLASS,
77 X86_64_X87UP_CLASS,
78 X86_64_COMPLEX_X87_CLASS,
79 X86_64_MEMORY_CLASS
80 };
81
82 #define MAX_CLASSES 4
83
84 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
85
86 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
87 of this code is to classify each 8bytes of incoming argument by the register
88 class and assign registers accordingly. */
89
90 /* Return the union class of CLASS1 and CLASS2.
91 See the x86-64 PS ABI for details. */
92
93 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)94 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
95 {
96 /* Rule #1: If both classes are equal, this is the resulting class. */
97 if (class1 == class2)
98 return class1;
99
100 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
101 the other class. */
102 if (class1 == X86_64_NO_CLASS)
103 return class2;
104 if (class2 == X86_64_NO_CLASS)
105 return class1;
106
107 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
108 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
109 return X86_64_MEMORY_CLASS;
110
111 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
112 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
113 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
114 return X86_64_INTEGERSI_CLASS;
115 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
116 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
117 return X86_64_INTEGER_CLASS;
118
119 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
120 MEMORY is used. */
121 if (class1 == X86_64_X87_CLASS
122 || class1 == X86_64_X87UP_CLASS
123 || class1 == X86_64_COMPLEX_X87_CLASS
124 || class2 == X86_64_X87_CLASS
125 || class2 == X86_64_X87UP_CLASS
126 || class2 == X86_64_COMPLEX_X87_CLASS)
127 return X86_64_MEMORY_CLASS;
128
129 /* Rule #6: Otherwise class SSE is used. */
130 return X86_64_SSE_CLASS;
131 }
132
133 /* Classify the argument of type TYPE and mode MODE.
134 CLASSES will be filled by the register class used to pass each word
135 of the operand. The number of words is returned. In case the parameter
136 should be passed in memory, 0 is returned. As a special case for zero
137 sized containers, classes[0] will be NO_CLASS and 1 is returned.
138
139 See the x86-64 PS ABI for details.
140 */
141 static int
classify_argument(ffi_type * type,enum x86_64_reg_class classes[],size_t byte_offset)142 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
143 size_t byte_offset)
144 {
145 switch (type->type)
146 {
147 case FFI_TYPE_UINT8:
148 case FFI_TYPE_SINT8:
149 case FFI_TYPE_UINT16:
150 case FFI_TYPE_SINT16:
151 case FFI_TYPE_UINT32:
152 case FFI_TYPE_SINT32:
153 case FFI_TYPE_UINT64:
154 case FFI_TYPE_SINT64:
155 case FFI_TYPE_POINTER:
156 {
157 int size = byte_offset + type->size;
158
159 if (size <= 4)
160 {
161 classes[0] = X86_64_INTEGERSI_CLASS;
162 return 1;
163 }
164 else if (size <= 8)
165 {
166 classes[0] = X86_64_INTEGER_CLASS;
167 return 1;
168 }
169 else if (size <= 12)
170 {
171 classes[0] = X86_64_INTEGER_CLASS;
172 classes[1] = X86_64_INTEGERSI_CLASS;
173 return 2;
174 }
175 else if (size <= 16)
176 {
177 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
178 return 2;
179 }
180 else
181 FFI_ASSERT (0);
182 }
183 case FFI_TYPE_FLOAT:
184 if (!(byte_offset % 8))
185 classes[0] = X86_64_SSESF_CLASS;
186 else
187 classes[0] = X86_64_SSE_CLASS;
188 return 1;
189 case FFI_TYPE_DOUBLE:
190 classes[0] = X86_64_SSEDF_CLASS;
191 return 1;
192 case FFI_TYPE_LONGDOUBLE:
193 classes[0] = X86_64_X87_CLASS;
194 classes[1] = X86_64_X87UP_CLASS;
195 return 2;
196 case FFI_TYPE_STRUCT:
197 {
198 const int UNITS_PER_WORD = 8;
199 int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
200 ffi_type **ptr;
201 int i;
202 enum x86_64_reg_class subclasses[MAX_CLASSES];
203
204 /* If the struct is larger than 32 bytes, pass it on the stack. */
205 if (type->size > 32)
206 return 0;
207
208 for (i = 0; i < words; i++)
209 classes[i] = X86_64_NO_CLASS;
210
211 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
212 signalize memory class, so handle it as special case. */
213 if (!words)
214 {
215 classes[0] = X86_64_NO_CLASS;
216 return 1;
217 }
218
219 /* Merge the fields of structure. */
220 for (ptr = type->elements; *ptr != NULL; ptr++)
221 {
222 int num;
223
224 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
225
226 num = classify_argument (*ptr, subclasses, byte_offset % 8);
227 if (num == 0)
228 return 0;
229 for (i = 0; i < num; i++)
230 {
231 int pos = byte_offset / 8;
232 classes[i + pos] =
233 merge_classes (subclasses[i], classes[i + pos]);
234 }
235
236 byte_offset += (*ptr)->size;
237 }
238
239 if (words > 2)
240 {
241 /* When size > 16 bytes, if the first one isn't
242 X86_64_SSE_CLASS or any other ones aren't
243 X86_64_SSEUP_CLASS, everything should be passed in
244 memory. */
245 if (classes[0] != X86_64_SSE_CLASS)
246 return 0;
247
248 for (i = 1; i < words; i++)
249 if (classes[i] != X86_64_SSEUP_CLASS)
250 return 0;
251 }
252
253 /* Final merger cleanup. */
254 for (i = 0; i < words; i++)
255 {
256 /* If one class is MEMORY, everything should be passed in
257 memory. */
258 if (classes[i] == X86_64_MEMORY_CLASS)
259 return 0;
260
261 /* The X86_64_SSEUP_CLASS should be always preceded by
262 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
263 if (classes[i] == X86_64_SSEUP_CLASS
264 && classes[i - 1] != X86_64_SSE_CLASS
265 && classes[i - 1] != X86_64_SSEUP_CLASS)
266 {
267 /* The first one should never be X86_64_SSEUP_CLASS. */
268 FFI_ASSERT (i != 0);
269 classes[i] = X86_64_SSE_CLASS;
270 }
271
272 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
273 everything should be passed in memory. */
274 if (classes[i] == X86_64_X87UP_CLASS
275 && (classes[i - 1] != X86_64_X87_CLASS))
276 {
277 /* The first one should never be X86_64_X87UP_CLASS. */
278 FFI_ASSERT (i != 0);
279 return 0;
280 }
281 }
282 return words;
283 }
284
285 default:
286 FFI_ASSERT(0);
287 }
288 return 0; /* Never reached. */
289 }
290
291 /* Examine the argument and return set number of register required in each
292 class. Return zero iff parameter should be passed in memory, otherwise
293 the number of registers. */
294
295 static int
examine_argument(ffi_type * type,enum x86_64_reg_class classes[MAX_CLASSES],_Bool in_return,int * pngpr,int * pnsse)296 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
297 _Bool in_return, int *pngpr, int *pnsse)
298 {
299 int i, n, ngpr, nsse;
300
301 n = classify_argument (type, classes, 0);
302 if (n == 0)
303 return 0;
304
305 ngpr = nsse = 0;
306 for (i = 0; i < n; ++i)
307 switch (classes[i])
308 {
309 case X86_64_INTEGER_CLASS:
310 case X86_64_INTEGERSI_CLASS:
311 ngpr++;
312 break;
313 case X86_64_SSE_CLASS:
314 case X86_64_SSESF_CLASS:
315 case X86_64_SSEDF_CLASS:
316 nsse++;
317 break;
318 case X86_64_NO_CLASS:
319 case X86_64_SSEUP_CLASS:
320 break;
321 case X86_64_X87_CLASS:
322 case X86_64_X87UP_CLASS:
323 case X86_64_COMPLEX_X87_CLASS:
324 return in_return != 0;
325 default:
326 abort ();
327 }
328
329 *pngpr = ngpr;
330 *pnsse = nsse;
331
332 return n;
333 }
334
335 /* Perform machine dependent cif processing. */
336
337 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)338 ffi_prep_cif_machdep (ffi_cif *cif)
339 {
340 int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
341 enum x86_64_reg_class classes[MAX_CLASSES];
342 size_t bytes;
343
344 gprcount = ssecount = 0;
345
346 flags = cif->rtype->type;
347 if (flags != FFI_TYPE_VOID)
348 {
349 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
350 if (n == 0)
351 {
352 /* The return value is passed in memory. A pointer to that
353 memory is the first argument. Allocate a register for it. */
354 gprcount++;
355 /* We don't have to do anything in asm for the return. */
356 flags = FFI_TYPE_VOID;
357 }
358 else if (flags == FFI_TYPE_STRUCT)
359 {
360 /* Mark which registers the result appears in. */
361 _Bool sse0 = SSE_CLASS_P (classes[0]);
362 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
363 if (sse0 && !sse1)
364 flags |= 1 << 8;
365 else if (!sse0 && sse1)
366 flags |= 1 << 9;
367 else if (sse0 && sse1)
368 flags |= 1 << 10;
369 /* Mark the true size of the structure. */
370 flags |= cif->rtype->size << 12;
371 }
372 }
373
374 /* Go over all arguments and determine the way they should be passed.
375 If it's in a register and there is space for it, let that be so. If
376 not, add it's size to the stack byte count. */
377 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
378 {
379 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
380 || gprcount + ngpr > MAX_GPR_REGS
381 || ssecount + nsse > MAX_SSE_REGS)
382 {
383 long align = cif->arg_types[i]->alignment;
384
385 if (align < 8)
386 align = 8;
387
388 bytes = ALIGN (bytes, align);
389 bytes += cif->arg_types[i]->size;
390 }
391 else
392 {
393 gprcount += ngpr;
394 ssecount += nsse;
395 }
396 }
397 if (ssecount)
398 flags |= 1 << 11;
399 cif->flags = flags;
400 cif->bytes = ALIGN (bytes, 8);
401
402 return FFI_OK;
403 }
404
405 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)406 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
407 {
408 enum x86_64_reg_class classes[MAX_CLASSES];
409 char *stack, *argp;
410 ffi_type **arg_types;
411 int gprcount, ssecount, ngpr, nsse, i, avn;
412 _Bool ret_in_memory;
413 struct register_args *reg_args;
414
415 /* Can't call 32-bit mode from 64-bit mode. */
416 FFI_ASSERT (cif->abi == FFI_UNIX64);
417
418 /* If the return value is a struct and we don't have a return value
419 address then we need to make one. Note the setting of flags to
420 VOID above in ffi_prep_cif_machdep. */
421 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
422 && (cif->flags & 0xff) == FFI_TYPE_VOID);
423 if (rvalue == NULL && ret_in_memory)
424 rvalue = alloca (cif->rtype->size);
425
426 /* Allocate the space for the arguments, plus 4 words of temp space. */
427 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
428 reg_args = (struct register_args *) stack;
429 argp = stack + sizeof (struct register_args);
430
431 gprcount = ssecount = 0;
432
433 /* If the return value is passed in memory, add the pointer as the
434 first integer argument. */
435 if (ret_in_memory)
436 reg_args->gpr[gprcount++] = (unsigned long) rvalue;
437
438 avn = cif->nargs;
439 arg_types = cif->arg_types;
440
441 for (i = 0; i < avn; ++i)
442 {
443 size_t size = arg_types[i]->size;
444 int n;
445
446 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
447 if (n == 0
448 || gprcount + ngpr > MAX_GPR_REGS
449 || ssecount + nsse > MAX_SSE_REGS)
450 {
451 long align = arg_types[i]->alignment;
452
453 /* Stack arguments are *always* at least 8 byte aligned. */
454 if (align < 8)
455 align = 8;
456
457 /* Pass this argument in memory. */
458 argp = (void *) ALIGN (argp, align);
459 memcpy (argp, avalue[i], size);
460 argp += size;
461 }
462 else
463 {
464 /* The argument is passed entirely in registers. */
465 char *a = (char *) avalue[i];
466 int j;
467
468 for (j = 0; j < n; j++, a += 8, size -= 8)
469 {
470 switch (classes[j])
471 {
472 case X86_64_INTEGER_CLASS:
473 case X86_64_INTEGERSI_CLASS:
474 reg_args->gpr[gprcount] = 0;
475 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
476 gprcount++;
477 break;
478 case X86_64_SSE_CLASS:
479 case X86_64_SSEDF_CLASS:
480 reg_args->sse[ssecount++] = *(UINT64 *) a;
481 break;
482 case X86_64_SSESF_CLASS:
483 reg_args->sse[ssecount++] = *(UINT32 *) a;
484 break;
485 default:
486 abort();
487 }
488 }
489 }
490 }
491
492 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
493 cif->flags, rvalue, fn, ssecount);
494 }
495
496
497 extern void ffi_closure_unix64(void);
498
499 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)500 ffi_prep_closure_loc (ffi_closure* closure,
501 ffi_cif* cif,
502 void (*fun)(ffi_cif*, void*, void**, void*),
503 void *user_data,
504 void *codeloc)
505 {
506 volatile unsigned short *tramp;
507
508 /* Sanity check on the cif ABI. */
509 {
510 int abi = cif->abi;
511 if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
512 return FFI_BAD_ABI;
513 }
514
515 tramp = (volatile unsigned short *) &closure->tramp[0];
516
517 tramp[0] = 0xbb49; /* mov <code>, %r11 */
518 *((unsigned long long * volatile) &tramp[1])
519 = (unsigned long) ffi_closure_unix64;
520 tramp[5] = 0xba49; /* mov <data>, %r10 */
521 *((unsigned long long * volatile) &tramp[6])
522 = (unsigned long) codeloc;
523
524 /* Set the carry bit iff the function uses any sse registers.
525 This is clc or stc, together with the first byte of the jmp. */
526 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
527
528 tramp[11] = 0xe3ff; /* jmp *%r11 */
529
530 closure->cif = cif;
531 closure->fun = fun;
532 closure->user_data = user_data;
533
534 return FFI_OK;
535 }
536
537 int
ffi_closure_unix64_inner(ffi_closure * closure,void * rvalue,struct register_args * reg_args,char * argp)538 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
539 struct register_args *reg_args, char *argp)
540 {
541 ffi_cif *cif;
542 void **avalue;
543 ffi_type **arg_types;
544 long i, avn;
545 int gprcount, ssecount, ngpr, nsse;
546 int ret;
547
548 cif = closure->cif;
549 avalue = alloca(cif->nargs * sizeof(void *));
550 gprcount = ssecount = 0;
551
552 ret = cif->rtype->type;
553 if (ret != FFI_TYPE_VOID)
554 {
555 enum x86_64_reg_class classes[MAX_CLASSES];
556 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
557 if (n == 0)
558 {
559 /* The return value goes in memory. Arrange for the closure
560 return value to go directly back to the original caller. */
561 rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
562 /* We don't have to do anything in asm for the return. */
563 ret = FFI_TYPE_VOID;
564 }
565 else if (ret == FFI_TYPE_STRUCT && n == 2)
566 {
567 /* Mark which register the second word of the structure goes in. */
568 _Bool sse0 = SSE_CLASS_P (classes[0]);
569 _Bool sse1 = SSE_CLASS_P (classes[1]);
570 if (!sse0 && sse1)
571 ret |= 1 << 8;
572 else if (sse0 && !sse1)
573 ret |= 1 << 9;
574 }
575 }
576
577 avn = cif->nargs;
578 arg_types = cif->arg_types;
579
580 for (i = 0; i < avn; ++i)
581 {
582 enum x86_64_reg_class classes[MAX_CLASSES];
583 int n;
584
585 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
586 if (n == 0
587 || gprcount + ngpr > MAX_GPR_REGS
588 || ssecount + nsse > MAX_SSE_REGS)
589 {
590 long align = arg_types[i]->alignment;
591
592 /* Stack arguments are *always* at least 8 byte aligned. */
593 if (align < 8)
594 align = 8;
595
596 /* Pass this argument in memory. */
597 argp = (void *) ALIGN (argp, align);
598 avalue[i] = argp;
599 argp += arg_types[i]->size;
600 }
601 /* If the argument is in a single register, or two consecutive
602 integer registers, then we can use that address directly. */
603 else if (n == 1
604 || (n == 2 && !(SSE_CLASS_P (classes[0])
605 || SSE_CLASS_P (classes[1]))))
606 {
607 /* The argument is in a single register. */
608 if (SSE_CLASS_P (classes[0]))
609 {
610 avalue[i] = ®_args->sse[ssecount];
611 ssecount += n;
612 }
613 else
614 {
615 avalue[i] = ®_args->gpr[gprcount];
616 gprcount += n;
617 }
618 }
619 /* Otherwise, allocate space to make them consecutive. */
620 else
621 {
622 char *a = alloca (16);
623 int j;
624
625 avalue[i] = a;
626 for (j = 0; j < n; j++, a += 8)
627 {
628 if (SSE_CLASS_P (classes[j]))
629 memcpy (a, ®_args->sse[ssecount++], 8);
630 else
631 memcpy (a, ®_args->gpr[gprcount++], 8);
632 }
633 }
634 }
635
636 /* Invoke the closure. */
637 closure->fun (cif, rvalue, avalue, closure->user_data);
638
639 /* Tell assembly how to perform return type promotions. */
640 return ret;
641 }
642
643 #endif /* __x86_64__ */
644