1 #if defined(__ppc__) || defined(__ppc64__)
2 
3 /* -----------------------------------------------------------------------
4    ffi.c - Copyright (c) 1998 Geoffrey Keating
5 
6    PowerPC Foreign Function Interface
7 
8    Darwin ABI support (c) 2001 John Hornkvist
9    AIX ABI support (c) 2002 Free Software Foundation, Inc.
10 
11    Permission is hereby granted, free of charge, to any person obtaining
12    a copy of this software and associated documentation files (the
13    ``Software''), to deal in the Software without restriction, including
14    without limitation the rights to use, copy, modify, merge, publish,
15    distribute, sublicense, and/or sell copies of the Software, and to
16    permit persons to whom the Software is furnished to do so, subject to
17    the following conditions:
18 
19    The above copyright notice and this permission notice shall be included
20    in all copies or substantial portions of the Software.
21 
22    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
23    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25    IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
26    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28    OTHER DEALINGS IN THE SOFTWARE.
29    ----------------------------------------------------------------------- */
30 
31 #include <ffi.h>
32 #include <ffi_common.h>
33 
34 #include <stdbool.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <ppc-darwin.h>
38 #include <architecture/ppc/mode_independent_asm.h>
39 
40 #if 0
41 #if defined(POWERPC_DARWIN)
42 #include <libkern/OSCacheControl.h>	// for sys_icache_invalidate()
43 #endif
44 
45 #else
46 
47 #pragma weak sys_icache_invalidate
48 extern void sys_icache_invalidate(void *start, size_t len);
49 
50 #endif
51 
52 
53 extern void ffi_closure_ASM(void);
54 
55 // The layout of a function descriptor.  A C function pointer really
56 // points to one of these.
57 typedef struct aix_fd_struct {
58   void*	code_pointer;
59   void*	toc;
60 } aix_fd;
61 
62 /* ffi_prep_args is called by the assembly routine once stack space
63    has been allocated for the function's arguments.
64 
65    The stack layout we want looks like this:
66 
67    |   Return address from ffi_call_DARWIN      |	higher addresses
68    |--------------------------------------------|
69    |   Previous backchain pointer      4/8      |	    stack pointer here
70    |--------------------------------------------|-\ <<< on entry to
71    |   Saved r28-r31                 (4/8)*4    | |	    ffi_call_DARWIN
72    |--------------------------------------------| |
73    |   Parameters      (at least 8*(4/8)=32/64) | | (176) +112 - +288
74    |--------------------------------------------| |
75    |   Space for GPR2                  4/8      | |
76    |--------------------------------------------| |	stack	|
77    |   Reserved						 (4/8)*2    | |	grows	|
78    |--------------------------------------------| |	down	V
79    |   Space for callee's LR           4/8      | |
80    |--------------------------------------------| |	lower addresses
81    |   Saved CR                        4/8      | |
82    |--------------------------------------------| |     stack pointer here
83    |   Current backchain pointer       4/8      | |     during
84    |--------------------------------------------|-/ <<< ffi_call_DARWIN
85 
86 	Note: ppc64 CR is saved in the low word of a long on the stack.
87 */
88 
89 /*@-exportheader@*/
90 void
ffi_prep_args(extended_cif * inEcif,unsigned * const stack)91 ffi_prep_args(
92 	extended_cif*	inEcif,
93 	unsigned *const	stack)
94 /*@=exportheader@*/
95 {
96 	/*	Copy the ecif to a local var so we can trample the arg.
97 		BC note: test this with GP later for possible problems...	*/
98 	volatile extended_cif*	ecif	= inEcif;
99 
100 	const unsigned bytes	= ecif->cif->bytes;
101 	const unsigned flags	= ecif->cif->flags;
102 
103 	/*	Cast the stack arg from int* to long*. sizeof(long) == 4 in 32-bit mode
104 		and 8 in 64-bit mode.	*/
105 	unsigned long *const longStack	= (unsigned long *const)stack;
106 
107 	/* 'stacktop' points at the previous backchain pointer.	*/
108 #if defined(__ppc64__)
109 	//	In ppc-darwin.s, an extra 96 bytes is reserved for the linkage area,
110 	//	saved registers, and an extra FPR.
111 	unsigned long *const stacktop	=
112 		(unsigned long *)(unsigned long)((char*)longStack + bytes + 96);
113 #elif defined(__ppc__)
114 	unsigned long *const stacktop	= longStack + (bytes / sizeof(long));
115 #else
116 #error undefined architecture
117 #endif
118 
119 	/* 'fpr_base' points at the space for fpr1, and grows upwards as
120 		we use FPR registers.  */
121 	double*		fpr_base = (double*)(stacktop - ASM_NEEDS_REGISTERS) -
122 		NUM_FPR_ARG_REGISTERS;
123 
124 #if defined(__ppc64__)
125 	//	64-bit saves an extra register, and uses an extra FPR. Knock fpr_base
126 	//	down a couple pegs.
127 	fpr_base -= 2;
128 #endif
129 
130 	unsigned int	fparg_count = 0;
131 
132 	/* 'next_arg' grows up as we put parameters in it.  */
133 	unsigned long*	next_arg = longStack + 6; /* 6 reserved positions.  */
134 
135 	int				i;
136 	double			double_tmp;
137 	void**			p_argv = ecif->avalue;
138 	unsigned long	gprvalue;
139 	ffi_type**		ptr = ecif->cif->arg_types;
140 
141 	/* Check that everything starts aligned properly.  */
142 	FFI_ASSERT(stack == SF_ROUND(stack));
143 	FFI_ASSERT(stacktop == SF_ROUND(stacktop));
144 	FFI_ASSERT(bytes == SF_ROUND(bytes));
145 
146 	/*	Deal with return values that are actually pass-by-reference.
147 		Rule:
148 		Return values are referenced by r3, so r4 is the first parameter.  */
149 
150 	if (flags & FLAG_RETVAL_REFERENCE)
151 		*next_arg++ = (unsigned long)(char*)ecif->rvalue;
152 
153 	/* Now for the arguments.  */
154 	for (i = ecif->cif->nargs; i > 0; i--, ptr++, p_argv++)
155     {
156 		switch ((*ptr)->type)
157 		{
158 			/*	If a floating-point parameter appears before all of the general-
159 				purpose registers are filled, the corresponding GPRs that match
160 				the size of the floating-point parameter are shadowed for the
161 				benefit of vararg and pre-ANSI functions.	*/
162 			case FFI_TYPE_FLOAT:
163 				double_tmp = *(float*)*p_argv;
164 
165 				if (fparg_count < NUM_FPR_ARG_REGISTERS)
166 					*fpr_base++ = double_tmp;
167 
168 				*(double*)next_arg = double_tmp;
169 
170 				next_arg++;
171 				fparg_count++;
172 				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
173 
174 				break;
175 
176 			case FFI_TYPE_DOUBLE:
177 				double_tmp = *(double*)*p_argv;
178 
179 				if (fparg_count < NUM_FPR_ARG_REGISTERS)
180 					*fpr_base++ = double_tmp;
181 
182 				*(double*)next_arg = double_tmp;
183 
184 				next_arg += MODE_CHOICE(2,1);
185 				fparg_count++;
186 				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
187 
188 				break;
189 
190 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
191 			case FFI_TYPE_LONGDOUBLE:
192 #if defined(__ppc64__)
193 				if (fparg_count < NUM_FPR_ARG_REGISTERS)
194 					*(long double*)fpr_base	= *(long double*)*p_argv;
195 #elif defined(__ppc__)
196 				if (fparg_count < NUM_FPR_ARG_REGISTERS - 1)
197 					*(long double*)fpr_base	= *(long double*)*p_argv;
198 				else if (fparg_count == NUM_FPR_ARG_REGISTERS - 1)
199 					*(double*)fpr_base	= *(double*)*p_argv;
200 #else
201 #error undefined architecture
202 #endif
203 
204 				*(long double*)next_arg	= *(long double*)*p_argv;
205 				fparg_count += 2;
206 				fpr_base += 2;
207 				next_arg += MODE_CHOICE(4,2);
208 				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
209 
210 				break;
211 #endif	//	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
212 
213 			case FFI_TYPE_UINT64:
214 			case FFI_TYPE_SINT64:
215 #if defined(__ppc64__)
216 				gprvalue = *(long long*)*p_argv;
217 				goto putgpr;
218 #elif defined(__ppc__)
219 				*(long long*)next_arg = *(long long*)*p_argv;
220 				next_arg += 2;
221 				break;
222 #else
223 #error undefined architecture
224 #endif
225 
226 			case FFI_TYPE_POINTER:
227 				gprvalue = *(unsigned long*)*p_argv;
228 				goto putgpr;
229 
230 			case FFI_TYPE_UINT8:
231 				gprvalue = *(unsigned char*)*p_argv;
232 				goto putgpr;
233 
234 			case FFI_TYPE_SINT8:
235 				gprvalue = *(signed char*)*p_argv;
236 				goto putgpr;
237 
238 			case FFI_TYPE_UINT16:
239 				gprvalue = *(unsigned short*)*p_argv;
240 				goto putgpr;
241 
242 			case FFI_TYPE_SINT16:
243 				gprvalue = *(signed short*)*p_argv;
244 				goto putgpr;
245 
246 			case FFI_TYPE_STRUCT:
247 			{
248 #if defined(__ppc64__)
249 				unsigned int	gprSize = 0;
250 				unsigned int	fprSize = 0;
251 
252 				ffi64_struct_to_reg_form(*ptr, (char*)*p_argv, NULL, &fparg_count,
253 					(char*)next_arg, &gprSize, (char*)fpr_base, &fprSize);
254 				next_arg += gprSize / sizeof(long);
255 				fpr_base += fprSize / sizeof(double);
256 
257 #elif defined(__ppc__)
258 				char*	dest_cpy = (char*)next_arg;
259 
260 			/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
261 				SI 4 bytes) are aligned as if they were those modes.
262 				Structures with 3 byte in size are padded upwards.  */
263 				unsigned size_al = (*ptr)->size;
264 
265 			/*	If the first member of the struct is a double, then align
266 				the struct to double-word.  */
267 				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
268 					size_al = ALIGN((*ptr)->size, 8);
269 
270 				if (ecif->cif->abi == FFI_DARWIN)
271 				{
272 					if (size_al < 3)
273 						dest_cpy += 4 - size_al;
274 				}
275 
276 				memcpy((char*)dest_cpy, (char*)*p_argv, size_al);
277 				next_arg += (size_al + 3) / 4;
278 #else
279 #error undefined architecture
280 #endif
281 				break;
282 			}
283 
284 			case FFI_TYPE_INT:
285 			case FFI_TYPE_UINT32:
286 			case FFI_TYPE_SINT32:
287 				gprvalue = *(unsigned*)*p_argv;
288 
289 putgpr:
290 				*next_arg++ = gprvalue;
291 				break;
292 
293 			default:
294 				break;
295 		}
296 	}
297 
298   /* Check that we didn't overrun the stack...  */
299   //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
300   //FFI_ASSERT((unsigned *)fpr_base
301   //	     <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
302   //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
303 }
304 
305 #if defined(__ppc64__)
306 
307 bool
ffi64_struct_contains_fp(const ffi_type * inType)308 ffi64_struct_contains_fp(
309 	const ffi_type*	inType)
310 {
311 	bool			containsFP	= false;
312 	unsigned int	i;
313 
314 	for (i = 0; inType->elements[i] != NULL && !containsFP; i++)
315 	{
316 		if (inType->elements[i]->type == FFI_TYPE_FLOAT		||
317 			inType->elements[i]->type == FFI_TYPE_DOUBLE	||
318 			inType->elements[i]->type == FFI_TYPE_LONGDOUBLE)
319 			containsFP = true;
320 		else if (inType->elements[i]->type == FFI_TYPE_STRUCT)
321 			containsFP = ffi64_struct_contains_fp(inType->elements[i]);
322 	}
323 
324 	return containsFP;
325 }
326 
327 #endif	// defined(__ppc64__)
328 
329 /* Perform machine dependent cif processing.  */
330 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)331 ffi_prep_cif_machdep(
332 	ffi_cif*	cif)
333 {
334 	/* All this is for the DARWIN ABI.  */
335 	int				i;
336 	ffi_type**		ptr;
337 	int				intarg_count = 0;
338 	int				fparg_count = 0;
339 	unsigned int	flags = 0;
340 	unsigned int	size_al = 0;
341 
342 	/*	All the machine-independent calculation of cif->bytes will be wrong.
343 		Redo the calculation for DARWIN.  */
344 
345 	/*	Space for the frame pointer, callee's LR, CR, etc, and for
346 		the asm's temp regs.  */
347 	unsigned int	bytes = (6 + ASM_NEEDS_REGISTERS) * sizeof(long);
348 
349 	/*	Return value handling.  The rules are as follows:
350 		- 32-bit (or less) integer values are returned in gpr3;
351 		- Structures of size <= 4 bytes also returned in gpr3;
352 		- 64-bit integer values and structures between 5 and 8 bytes are
353 			returned in gpr3 and gpr4;
354 		- Single/double FP values are returned in fpr1;
355 		- Long double FP (if not equivalent to double) values are returned in
356 			fpr1 and fpr2;
357 		- Larger structures values are allocated space and a pointer is passed
358 			as the first argument.  */
359 	switch (cif->rtype->type)
360 	{
361 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
362 		case FFI_TYPE_LONGDOUBLE:
363 			flags |= FLAG_RETURNS_128BITS;
364 			flags |= FLAG_RETURNS_FP;
365 			break;
366 #endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
367 
368 		case FFI_TYPE_DOUBLE:
369 			flags |= FLAG_RETURNS_64BITS;
370 			/* Fall through.  */
371 		case FFI_TYPE_FLOAT:
372 			flags |= FLAG_RETURNS_FP;
373 			break;
374 
375 #if defined(__ppc64__)
376 		case FFI_TYPE_POINTER:
377 #endif
378 		case FFI_TYPE_UINT64:
379 		case FFI_TYPE_SINT64:
380 			flags |= FLAG_RETURNS_64BITS;
381 			break;
382 
383 		case FFI_TYPE_STRUCT:
384 		{
385 #if defined(__ppc64__)
386 
387 			if (ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
388 			{
389 				flags |= FLAG_RETVAL_REFERENCE;
390 				flags |= FLAG_RETURNS_NOTHING;
391 				intarg_count++;
392 			}
393 			else
394 			{
395 				flags |= FLAG_RETURNS_STRUCT;
396 
397 				if (ffi64_struct_contains_fp(cif->rtype))
398 					flags |= FLAG_STRUCT_CONTAINS_FP;
399 			}
400 
401 #elif defined(__ppc__)
402 
403 			flags |= FLAG_RETVAL_REFERENCE;
404 			flags |= FLAG_RETURNS_NOTHING;
405 			intarg_count++;
406 
407 #else
408 #error undefined architecture
409 #endif
410 			break;
411 		}
412 
413 		case FFI_TYPE_VOID:
414 			flags |= FLAG_RETURNS_NOTHING;
415 			break;
416 
417 		default:
418 			/* Returns 32-bit integer, or similar.  Nothing to do here.  */
419 			break;
420 	}
421 
422 	/*	The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
423 		first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
424 		goes on the stack.  Structures are passed as a pointer to a copy of
425 		the structure. Stuff on the stack needs to keep proper alignment.  */
426 	for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
427 	{
428 		switch ((*ptr)->type)
429 		{
430 			case FFI_TYPE_FLOAT:
431 			case FFI_TYPE_DOUBLE:
432 				fparg_count++;
433 				/*	If this FP arg is going on the stack, it must be
434 					8-byte-aligned.  */
435 				if (fparg_count > NUM_FPR_ARG_REGISTERS
436 					&& intarg_count % 2 != 0)
437 					intarg_count++;
438 				break;
439 
440 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
441 			case FFI_TYPE_LONGDOUBLE:
442 				fparg_count += 2;
443 				/*	If this FP arg is going on the stack, it must be
444 					8-byte-aligned.  */
445 
446 				if (
447 #if defined(__ppc64__)
448 					fparg_count > NUM_FPR_ARG_REGISTERS + 1
449 #elif defined(__ppc__)
450 					fparg_count > NUM_FPR_ARG_REGISTERS
451 #else
452 #error undefined architecture
453 #endif
454 					&& intarg_count % 2 != 0)
455 					intarg_count++;
456 
457 				intarg_count += 2;
458 				break;
459 #endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
460 
461 			case FFI_TYPE_UINT64:
462 			case FFI_TYPE_SINT64:
463 				/*	'long long' arguments are passed as two words, but
464 					either both words must fit in registers or both go
465 					on the stack.  If they go on the stack, they must
466 					be 8-byte-aligned.  */
467 				if (intarg_count == NUM_GPR_ARG_REGISTERS - 1
468 					|| (intarg_count >= NUM_GPR_ARG_REGISTERS
469 					&& intarg_count % 2 != 0))
470 					intarg_count++;
471 
472 				intarg_count += MODE_CHOICE(2,1);
473 
474 				break;
475 
476 			case FFI_TYPE_STRUCT:
477 				size_al = (*ptr)->size;
478 				/*	If the first member of the struct is a double, then align
479 					the struct to double-word.  */
480 				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
481 					size_al = ALIGN((*ptr)->size, 8);
482 
483 #if defined(__ppc64__)
484 				// Look for FP struct members.
485 				unsigned int	j;
486 
487 				for (j = 0; (*ptr)->elements[j] != NULL; j++)
488 				{
489 					if ((*ptr)->elements[j]->type == FFI_TYPE_FLOAT	||
490 						(*ptr)->elements[j]->type == FFI_TYPE_DOUBLE)
491 					{
492 						fparg_count++;
493 
494 						if (fparg_count > NUM_FPR_ARG_REGISTERS)
495 							intarg_count++;
496 					}
497 					else if ((*ptr)->elements[j]->type == FFI_TYPE_LONGDOUBLE)
498 					{
499 						fparg_count += 2;
500 
501 						if (fparg_count > NUM_FPR_ARG_REGISTERS + 1)
502 							intarg_count += 2;
503 					}
504 					else
505 						intarg_count++;
506 				}
507 #elif defined(__ppc__)
508 				intarg_count += (size_al + 3) / 4;
509 #else
510 #error undefined architecture
511 #endif
512 
513 				break;
514 
515 			default:
516 				/*	Everything else is passed as a 4/8-byte word in a GPR, either
517 					the object itself or a pointer to it.  */
518 				intarg_count++;
519 				break;
520 		}
521 	}
522 
523 	/* Space for the FPR registers, if needed.  */
524 	if (fparg_count != 0)
525 	{
526 		flags |= FLAG_FP_ARGUMENTS;
527 #if defined(__ppc64__)
528 		bytes += (NUM_FPR_ARG_REGISTERS + 1) * sizeof(double);
529 #elif defined(__ppc__)
530 		bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
531 #else
532 #error undefined architecture
533 #endif
534 	}
535 
536 	/* Stack space.  */
537 #if defined(__ppc64__)
538 	if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS)
539 		bytes += (intarg_count + fparg_count) * sizeof(long);
540 #elif defined(__ppc__)
541 	if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS)
542 		bytes += (intarg_count + 2 * fparg_count) * sizeof(long);
543 #else
544 #error undefined architecture
545 #endif
546 	else
547 		bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
548 
549 	/* The stack space allocated needs to be a multiple of 16/32 bytes.  */
550 	bytes = SF_ROUND(bytes);
551 
552 	cif->flags = flags;
553 	cif->bytes = bytes;
554 
555 	return FFI_OK;
556 }
557 
558 /*@-declundef@*/
559 /*@-exportheader@*/
560 extern void
561 ffi_call_AIX(
562 /*@out@*/	extended_cif*,
563 			unsigned,
564 			unsigned,
565 /*@out@*/	unsigned*,
566 			void (*fn)(void),
567 			void (*fn2)(extended_cif*, unsigned *const));
568 
569 extern void
570 ffi_call_DARWIN(
571 /*@out@*/	extended_cif*,
572 			unsigned long,
573 			unsigned,
574 /*@out@*/	unsigned*,
575 			void (*fn)(void),
576 			void (*fn2)(extended_cif*, unsigned *const));
577 /*@=declundef@*/
578 /*@=exportheader@*/
579 
580 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)581 ffi_call(
582 /*@dependent@*/	ffi_cif*	cif,
583 				void		(*fn)(void),
584 /*@out@*/		void*		rvalue,
585 /*@dependent@*/	void**		avalue)
586 {
587 	extended_cif ecif;
588 
589 	ecif.cif = cif;
590 	ecif.avalue = avalue;
591 
592 	/*	If the return value is a struct and we don't have a return
593 		value address then we need to make one.  */
594 	if ((rvalue == NULL) &&
595 		(cif->rtype->type == FFI_TYPE_STRUCT))
596 	{
597 		/*@-sysunrecog@*/
598 		ecif.rvalue = alloca(cif->rtype->size);
599 		/*@=sysunrecog@*/
600 	}
601 	else
602 		ecif.rvalue = rvalue;
603 
604 	switch (cif->abi)
605 	{
606 		case FFI_AIX:
607 			/*@-usedef@*/
608 			ffi_call_AIX(&ecif, -cif->bytes,
609 				cif->flags, ecif.rvalue, fn, ffi_prep_args);
610 			/*@=usedef@*/
611 			break;
612 
613 		case FFI_DARWIN:
614 			/*@-usedef@*/
615 			ffi_call_DARWIN(&ecif, -(long)cif->bytes,
616 				cif->flags, ecif.rvalue, fn, ffi_prep_args);
617 			/*@=usedef@*/
618 			break;
619 
620 		default:
621 			FFI_ASSERT(0);
622 			break;
623     }
624 }
625 
626 /* here I'd like to add the stack frame layout we use in darwin_closure.S
627    and aix_clsoure.S
628 
629    SP previous -> +---------------------------------------+ <--- child frame
630 		  | back chain to caller 4                |
631 		  +---------------------------------------+ 4
632 		  | saved CR 4                            |
633 		  +---------------------------------------+ 8
634 		  | saved LR 4                            |
635 		  +---------------------------------------+ 12
636 		  | reserved for compilers 4              |
637 		  +---------------------------------------+ 16
638 		  | reserved for binders 4                |
639 		  +---------------------------------------+ 20
640 		  | saved TOC pointer 4                   |
641 		  +---------------------------------------+ 24
642 		  | always reserved 8*4=32 (previous GPRs)|
643 		  | according to the linkage convention   |
644 		  | from AIX                              |
645 		  +---------------------------------------+ 56
646 		  | our FPR area 13*8=104                 |
647 		  | f1                                    |
648 		  | .                                     |
649 		  | f13                                   |
650 		  +---------------------------------------+ 160
651 		  | result area 8                         |
652 		  +---------------------------------------+ 168
653 		  | alignement to the next multiple of 16 |
654 SP current -->    +---------------------------------------+ 176 <- parent frame
655 		  | back chain to caller 4                |
656 		  +---------------------------------------+ 180
657 		  | saved CR 4                            |
658 		  +---------------------------------------+ 184
659 		  | saved LR 4                            |
660 		  +---------------------------------------+ 188
661 		  | reserved for compilers 4              |
662 		  +---------------------------------------+ 192
663 		  | reserved for binders 4                |
664 		  +---------------------------------------+ 196
665 		  | saved TOC pointer 4                   |
666 		  +---------------------------------------+ 200
667 		  | always reserved 8*4=32  we store our  |
668 		  | GPRs here                             |
669 		  | r3                                    |
670 		  | .                                     |
671 		  | r10                                   |
672 		  +---------------------------------------+ 232
673 		  | overflow part                         |
674 		  +---------------------------------------+ xxx
675 		  | ????                                  |
676 		  +---------------------------------------+ xxx
677 */
678 
679 #if !defined(POWERPC_DARWIN)
680 
681 #define MIN_LINE_SIZE 32
682 
683 static void
flush_icache(char * addr)684 flush_icache(
685 	char*	addr)
686 {
687 #ifndef _AIX
688 	__asm__ volatile (
689 		"dcbf 0,%0\n"
690 		"sync\n"
691 		"icbi 0,%0\n"
692 		"sync\n"
693 		"isync"
694 		: : "r" (addr) : "memory");
695 #endif
696 }
697 
698 static void
flush_range(char * addr,int size)699 flush_range(
700 	char*	addr,
701 	int		size)
702 {
703 	int i;
704 
705 	for (i = 0; i < size; i += MIN_LINE_SIZE)
706 		flush_icache(addr + i);
707 
708 	flush_icache(addr + size - 1);
709 }
710 
711 #endif	// !defined(POWERPC_DARWIN)
712 
713 ffi_status
ffi_prep_closure(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data)714 ffi_prep_closure(
715 	ffi_closure*	closure,
716 	ffi_cif*		cif,
717 	void			(*fun)(ffi_cif*, void*, void**, void*),
718 	void*			user_data)
719 {
720 	switch (cif->abi)
721 	{
722 		case FFI_DARWIN:
723 		{
724 			FFI_ASSERT (cif->abi == FFI_DARWIN);
725 
726 			unsigned int*	tramp = (unsigned int*)&closure->tramp[0];
727 
728 #if defined(__ppc64__)
729 			tramp[0] = 0x7c0802a6;	//	mflr	r0
730 			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
731 			tramp[2] = 0x7d6802a6;	//	mflr	r11
732 			tramp[3] = 0x7c0803a6;	//	mtlr	r0
733 			tramp[4] = 0xe98b0018;	//	ld		r12,24(r11)
734 			tramp[5] = 0x7d8903a6;	//	mtctr	r12
735 			tramp[6] = 0xe96b0020;	//	ld		r11,32(r11)
736 			tramp[7] = 0x4e800420;	//	bctr
737 			*(unsigned long*)&tramp[8] = (unsigned long)ffi_closure_ASM;
738 			*(unsigned long*)&tramp[10] = (unsigned long)closure;
739 #elif defined(__ppc__)
740 			tramp[0] = 0x7c0802a6;	//	mflr	r0
741 			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
742 			tramp[2] = 0x7d6802a6;	//	mflr	r11
743 			tramp[3] = 0x7c0803a6;	//	mtlr	r0
744 			tramp[4] = 0x818b0018;	//	lwz		r12,24(r11)
745 			tramp[5] = 0x7d8903a6;	//	mtctr	r12
746 			tramp[6] = 0x816b001c;	//	lwz		r11,28(r11)
747 			tramp[7] = 0x4e800420;	//	bctr
748 			tramp[8] = (unsigned long)ffi_closure_ASM;
749 			tramp[9] = (unsigned long)closure;
750 #else
751 #error undefined architecture
752 #endif
753 
754 			closure->cif = cif;
755 			closure->fun = fun;
756 			closure->user_data = user_data;
757 
758 			// Flush the icache. Only necessary on Darwin.
759 #if defined(POWERPC_DARWIN)
760 			sys_icache_invalidate(closure->tramp, FFI_TRAMPOLINE_SIZE);
761 #else
762 			flush_range(closure->tramp, FFI_TRAMPOLINE_SIZE);
763 #endif
764 
765 			break;
766 		}
767 
768 		case FFI_AIX:
769 		{
770 			FFI_ASSERT (cif->abi == FFI_AIX);
771 
772 			ffi_aix_trampoline_struct*	tramp_aix =
773 				(ffi_aix_trampoline_struct*)(closure->tramp);
774 			aix_fd*	fd = (aix_fd*)(void*)ffi_closure_ASM;
775 
776 			tramp_aix->code_pointer = fd->code_pointer;
777 			tramp_aix->toc = fd->toc;
778 			tramp_aix->static_chain = closure;
779 			closure->cif = cif;
780 			closure->fun = fun;
781 			closure->user_data = user_data;
782 			break;
783 		}
784 
785 		default:
786 			return FFI_BAD_ABI;
787 	}
788 
789 	return FFI_OK;
790 }
791 
792 #if defined(__ppc__)
793 	typedef double ldbits[2];
794 
795 	typedef union
796 	{
797 		ldbits lb;
798 		long double ld;
799 	} ldu;
800 #endif
801 
802 typedef union
803 {
804 	float	f;
805 	double	d;
806 } ffi_dblfl;
807 
808 /*	The trampoline invokes ffi_closure_ASM, and on entry, r11 holds the
809 	address of the closure. After storing the registers that could possibly
810 	contain parameters to be passed into the stack frame and setting up space
811 	for a return value, ffi_closure_ASM invokes the following helper function
812 	to do most of the work.  */
813 int
ffi_closure_helper_DARWIN(ffi_closure * closure,void * rvalue,unsigned long * pgr,ffi_dblfl * pfr)814 ffi_closure_helper_DARWIN(
815 	ffi_closure*	closure,
816 	void*			rvalue,
817 	unsigned long*	pgr,
818 	ffi_dblfl*		pfr)
819 {
820 	/*	rvalue is the pointer to space for return value in closure assembly
821 		pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM
822 		pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM.  */
823 
824 #if defined(__ppc__)
825 	ldu	temp_ld;
826 #endif
827 
828 	double				temp;
829 	unsigned int		i;
830 	unsigned int		nf = 0;	/* number of FPRs already used.  */
831 	unsigned int		ng = 0;	/* number of GPRs already used.  */
832 	ffi_cif*			cif = closure->cif;
833 	long				avn = cif->nargs;
834 	void**				avalue = alloca(cif->nargs * sizeof(void*));
835 	ffi_type**			arg_types = cif->arg_types;
836 
837 	/*	Copy the caller's structure return value address so that the closure
838 		returns the data directly to the caller.  */
839 #if defined(__ppc64__)
840 	if (cif->rtype->type == FFI_TYPE_STRUCT &&
841 		ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
842 #elif defined(__ppc__)
843 	if (cif->rtype->type == FFI_TYPE_STRUCT)
844 #else
845 #error undefined architecture
846 #endif
847 	{
848 		rvalue = (void*)*pgr;
849 		pgr++;
850 		ng++;
851 	}
852 
853 	/* Grab the addresses of the arguments from the stack frame.  */
854 	for (i = 0; i < avn; i++)
855 	{
856 		switch (arg_types[i]->type)
857 		{
858 			case FFI_TYPE_SINT8:
859 			case FFI_TYPE_UINT8:
860 				avalue[i] = (char*)pgr + MODE_CHOICE(3,7);
861 				ng++;
862 				pgr++;
863 				break;
864 
865 			case FFI_TYPE_SINT16:
866 			case FFI_TYPE_UINT16:
867 				avalue[i] = (char*)pgr + MODE_CHOICE(2,6);
868 				ng++;
869 				pgr++;
870 				break;
871 
872 #if defined(__ppc__)
873 			case FFI_TYPE_POINTER:
874 #endif
875 			case FFI_TYPE_SINT32:
876 			case FFI_TYPE_UINT32:
877 				avalue[i] = (char*)pgr + MODE_CHOICE(0,4);
878 				ng++;
879 				pgr++;
880 
881 				break;
882 
883 			case FFI_TYPE_STRUCT:
884 				if (cif->abi == FFI_DARWIN)
885 				{
886 #if defined(__ppc64__)
887 					unsigned int	gprSize = 0;
888 					unsigned int	fprSize	= 0;
889 					unsigned int	savedFPRSize = fprSize;
890 
891 					avalue[i] = alloca(arg_types[i]->size);
892 					ffi64_struct_to_ram_form(arg_types[i], (const char*)pgr,
893 						&gprSize, (const char*)pfr, &fprSize, &nf, avalue[i], NULL);
894 
895 					ng	+= gprSize / sizeof(long);
896 					pgr	+= gprSize / sizeof(long);
897 					pfr	+= (fprSize - savedFPRSize) / sizeof(double);
898 
899 #elif defined(__ppc__)
900 					/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
901 						SI 4 bytes) are aligned as if they were those modes.  */
902 					unsigned int	size_al	= size_al = arg_types[i]->size;
903 
904 					/*	If the first member of the struct is a double, then align
905 						the struct to double-word.  */
906 					if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
907 						size_al = ALIGN(arg_types[i]->size, 8);
908 
909 					if (size_al < 3)
910 						avalue[i] = (void*)pgr + MODE_CHOICE(4,8) - size_al;
911 					else
912 						avalue[i] = (void*)pgr;
913 
914 					ng	+= (size_al + 3) / sizeof(long);
915 					pgr += (size_al + 3) / sizeof(long);
916 #else
917 #error undefined architecture
918 #endif
919 				}
920 
921 				break;
922 
923 #if defined(__ppc64__)
924 			case FFI_TYPE_POINTER:
925 #endif
926 			case FFI_TYPE_SINT64:
927 			case FFI_TYPE_UINT64:
928 				/* Long long ints are passed in 1 or 2 GPRs.  */
929 				avalue[i] = pgr;
930 				ng += MODE_CHOICE(2,1);
931 				pgr += MODE_CHOICE(2,1);
932 
933 				break;
934 
935 			case FFI_TYPE_FLOAT:
936 				/*	A float value consumes a GPR.
937 					There are 13 64-bit floating point registers.  */
938 				if (nf < NUM_FPR_ARG_REGISTERS)
939 				{
940 					temp = pfr->d;
941 					pfr->f = (float)temp;
942 					avalue[i] = pfr;
943 					pfr++;
944 				}
945 				else
946 					avalue[i] = pgr;
947 
948 				nf++;
949 				ng++;
950 				pgr++;
951 				break;
952 
953 			case FFI_TYPE_DOUBLE:
954 				/*	A double value consumes one or two GPRs.
955 					There are 13 64bit floating point registers.  */
956 				if (nf < NUM_FPR_ARG_REGISTERS)
957 				{
958 					avalue[i] = pfr;
959 					pfr++;
960 				}
961 				else
962 					avalue[i] = pgr;
963 
964 				nf++;
965 				ng += MODE_CHOICE(2,1);
966 				pgr += MODE_CHOICE(2,1);
967 
968 				break;
969 
970 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
971 
972 			case FFI_TYPE_LONGDOUBLE:
973 #if defined(__ppc64__)
974 				if (nf < NUM_FPR_ARG_REGISTERS)
975 				{
976 					avalue[i] = pfr;
977 					pfr += 2;
978 				}
979 #elif defined(__ppc__)
980 				/*	A long double value consumes 2/4 GPRs and 2 FPRs.
981 					There are 13 64bit floating point registers.  */
982 				if (nf < NUM_FPR_ARG_REGISTERS - 1)
983 				{
984 					avalue[i] = pfr;
985 					pfr += 2;
986 				}
987 				/*	Here we have the situation where one part of the long double
988 					is stored in fpr13 and the other part is already on the stack.
989 					We use a union to pass the long double to avalue[i].  */
990 				else if (nf == NUM_FPR_ARG_REGISTERS - 1)
991 				{
992 					memcpy (&temp_ld.lb[0], pfr, sizeof(temp_ld.lb[0]));
993 					memcpy (&temp_ld.lb[1], pgr + 2, sizeof(temp_ld.lb[1]));
994 					avalue[i] = &temp_ld.ld;
995 				}
996 #else
997 #error undefined architecture
998 #endif
999 				else
1000 					avalue[i] = pgr;
1001 
1002 				nf += 2;
1003 				ng += MODE_CHOICE(4,2);
1004 				pgr += MODE_CHOICE(4,2);
1005 
1006 				break;
1007 
1008 #endif	/*	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE	*/
1009 
1010 			default:
1011 				FFI_ASSERT(0);
1012 				break;
1013 		}
1014 	}
1015 
1016 	(closure->fun)(cif, rvalue, avalue, closure->user_data);
1017 
1018 	/* Tell ffi_closure_ASM to perform return type promotions.  */
1019 	return cif->rtype->type;
1020 }
1021 
1022 #if defined(__ppc64__)
1023 
1024 /*	ffi64_struct_to_ram_form
1025 
1026 	Rebuild a struct's natural layout from buffers of concatenated registers.
1027 	Return the number of registers used.
1028 	inGPRs[0-7] == r3, inFPRs[0-7] == f1 ...
1029 */
1030 void
ffi64_struct_to_ram_form(const ffi_type * inType,const char * inGPRs,unsigned int * ioGPRMarker,const char * inFPRs,unsigned int * ioFPRMarker,unsigned int * ioFPRsUsed,char * outStruct,unsigned int * ioStructMarker)1031 ffi64_struct_to_ram_form(
1032 	const ffi_type*	inType,
1033 	const char*		inGPRs,
1034 	unsigned int*	ioGPRMarker,
1035 	const char*		inFPRs,
1036 	unsigned int*	ioFPRMarker,
1037 	unsigned int*	ioFPRsUsed,
1038 	char*			outStruct,	// caller-allocated
1039 	unsigned int*	ioStructMarker)
1040 {
1041 	unsigned int	srcGMarker		= 0;
1042 	unsigned int	srcFMarker		= 0;
1043 	unsigned int	savedFMarker	= 0;
1044 	unsigned int	fprsUsed		= 0;
1045 	unsigned int	savedFPRsUsed	= 0;
1046 	unsigned int	destMarker		= 0;
1047 
1048 	static unsigned int	recurseCount	= 0;
1049 
1050 	if (ioGPRMarker)
1051 		srcGMarker	= *ioGPRMarker;
1052 
1053 	if (ioFPRMarker)
1054 	{
1055 		srcFMarker		= *ioFPRMarker;
1056 		savedFMarker	= srcFMarker;
1057 	}
1058 
1059 	if (ioFPRsUsed)
1060 	{
1061 		fprsUsed		= *ioFPRsUsed;
1062 		savedFPRsUsed	= fprsUsed;
1063 	}
1064 
1065 	if (ioStructMarker)
1066 		destMarker	= *ioStructMarker;
1067 
1068 	size_t			i;
1069 
1070 	switch (inType->size)
1071 	{
1072 		case 1: case 2: case 4:
1073 			srcGMarker += 8 - inType->size;
1074 			break;
1075 
1076 		default:
1077 			break;
1078 	}
1079 
1080 	for (i = 0; inType->elements[i] != NULL; i++)
1081 	{
1082 		switch (inType->elements[i]->type)
1083 		{
1084 			case FFI_TYPE_FLOAT:
1085 				srcFMarker = ALIGN(srcFMarker, 4);
1086 				srcGMarker = ALIGN(srcGMarker, 4);
1087 				destMarker = ALIGN(destMarker, 4);
1088 
1089 				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1090 				{
1091 					*(float*)&outStruct[destMarker]	=
1092 						(float)*(double*)&inFPRs[srcFMarker];
1093 					srcFMarker += 8;
1094 					fprsUsed++;
1095 				}
1096 				else
1097 					*(float*)&outStruct[destMarker]	=
1098 						(float)*(double*)&inGPRs[srcGMarker];
1099 
1100 				srcGMarker += 4;
1101 				destMarker += 4;
1102 
1103 				// Skip to next GPR if next element won't fit and we're
1104 				// not already at a register boundary.
1105 				if (inType->elements[i + 1] != NULL && (destMarker % 8))
1106 				{
1107 					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1108 						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1109 						(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1110 						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1111 						(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1112 						srcGMarker	= ALIGN(srcGMarker, 8);
1113 				}
1114 
1115 				break;
1116 
1117 			case FFI_TYPE_DOUBLE:
1118 				srcFMarker = ALIGN(srcFMarker, 8);
1119 				destMarker = ALIGN(destMarker, 8);
1120 
1121 				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1122 				{
1123 					*(double*)&outStruct[destMarker]	=
1124 						*(double*)&inFPRs[srcFMarker];
1125 					srcFMarker += 8;
1126 					fprsUsed++;
1127 				}
1128 				else
1129 					*(double*)&outStruct[destMarker]	=
1130 						*(double*)&inGPRs[srcGMarker];
1131 
1132 				destMarker += 8;
1133 
1134 				// Skip next GPR
1135 				srcGMarker += 8;
1136 				srcGMarker = ALIGN(srcGMarker, 8);
1137 
1138 				break;
1139 
1140 			case FFI_TYPE_LONGDOUBLE:
1141 				destMarker = ALIGN(destMarker, 16);
1142 
1143 				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1144 				{
1145 					srcFMarker = ALIGN(srcFMarker, 8);
1146 					srcGMarker = ALIGN(srcGMarker, 8);
1147 					*(long double*)&outStruct[destMarker]	=
1148 						*(long double*)&inFPRs[srcFMarker];
1149 					srcFMarker += 16;
1150 					fprsUsed += 2;
1151 				}
1152 				else
1153 				{
1154 					srcFMarker = ALIGN(srcFMarker, 16);
1155 					srcGMarker = ALIGN(srcGMarker, 16);
1156 					*(long double*)&outStruct[destMarker]	=
1157 						*(long double*)&inGPRs[srcGMarker];
1158 				}
1159 
1160 				destMarker += 16;
1161 
1162 				// Skip next 2 GPRs
1163 				srcGMarker += 16;
1164 				srcGMarker = ALIGN(srcGMarker, 8);
1165 
1166 				break;
1167 
1168 			case FFI_TYPE_UINT8:
1169 			case FFI_TYPE_SINT8:
1170 			{
1171 				if (inType->alignment == 1)	// chars only
1172 				{
1173 					if (inType->size == 1)
1174 						outStruct[destMarker++] = inGPRs[srcGMarker++];
1175 					else if (inType->size == 2)
1176 					{
1177 						outStruct[destMarker++] = inGPRs[srcGMarker++];
1178 						outStruct[destMarker++] = inGPRs[srcGMarker++];
1179 						i++;
1180 					}
1181 					else
1182 					{
1183 						memcpy(&outStruct[destMarker],
1184 							&inGPRs[srcGMarker], inType->size);
1185 						srcGMarker += inType->size;
1186 						destMarker += inType->size;
1187 						i += inType->size - 1;
1188 					}
1189 				}
1190 				else	// chars and other stuff
1191 				{
1192 					outStruct[destMarker++] = inGPRs[srcGMarker++];
1193 
1194 					// Skip to next GPR if next element won't fit and we're
1195 					// not already at a register boundary.
1196 					if (inType->elements[i + 1] != NULL && (srcGMarker % 8))
1197 					{
1198 						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1199 							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1200 							(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1201 							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1202 							(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1203 							srcGMarker	= ALIGN(srcGMarker, inType->alignment);	// was 8
1204 					}
1205 				}
1206 
1207 				break;
1208 			}
1209 
1210 			case FFI_TYPE_UINT16:
1211 			case FFI_TYPE_SINT16:
1212 				srcGMarker = ALIGN(srcGMarker, 2);
1213 				destMarker = ALIGN(destMarker, 2);
1214 
1215 				*(short*)&outStruct[destMarker] =
1216 					*(short*)&inGPRs[srcGMarker];
1217 				srcGMarker += 2;
1218 				destMarker += 2;
1219 
1220 				break;
1221 
1222 			case FFI_TYPE_INT:
1223 			case FFI_TYPE_UINT32:
1224 			case FFI_TYPE_SINT32:
1225 				srcGMarker = ALIGN(srcGMarker, 4);
1226 				destMarker = ALIGN(destMarker, 4);
1227 
1228 				*(int*)&outStruct[destMarker] =
1229 					*(int*)&inGPRs[srcGMarker];
1230 				srcGMarker += 4;
1231 				destMarker += 4;
1232 
1233 				break;
1234 
1235 			case FFI_TYPE_POINTER:
1236 			case FFI_TYPE_UINT64:
1237 			case FFI_TYPE_SINT64:
1238 				srcGMarker = ALIGN(srcGMarker, 8);
1239 				destMarker = ALIGN(destMarker, 8);
1240 
1241 				*(long long*)&outStruct[destMarker] =
1242 					*(long long*)&inGPRs[srcGMarker];
1243 				srcGMarker += 8;
1244 				destMarker += 8;
1245 
1246 				break;
1247 
1248 			case FFI_TYPE_STRUCT:
1249 				recurseCount++;
1250 				ffi64_struct_to_ram_form(inType->elements[i], inGPRs,
1251 					&srcGMarker, inFPRs, &srcFMarker, &fprsUsed,
1252 					outStruct, &destMarker);
1253 				recurseCount--;
1254 				break;
1255 
1256 			default:
1257 				FFI_ASSERT(0);	// unknown element type
1258 				break;
1259 		}
1260 	}
1261 
1262 	srcGMarker = ALIGN(srcGMarker, inType->alignment);
1263 
1264 	// Take care of the special case for 16-byte structs, but not for
1265 	// nested structs.
1266 	if (recurseCount == 0 && srcGMarker == 16)
1267 	{
1268 		*(long double*)&outStruct[0] = *(long double*)&inGPRs[0];
1269 		srcFMarker	= savedFMarker;
1270 		fprsUsed	= savedFPRsUsed;
1271 	}
1272 
1273 	if (ioGPRMarker)
1274 		*ioGPRMarker = ALIGN(srcGMarker, 8);
1275 
1276 	if (ioFPRMarker)
1277 		*ioFPRMarker = srcFMarker;
1278 
1279 	if (ioFPRsUsed)
1280 		*ioFPRsUsed	= fprsUsed;
1281 
1282 	if (ioStructMarker)
1283 		*ioStructMarker	= ALIGN(destMarker, 8);
1284 }
1285 
1286 /*	ffi64_struct_to_reg_form
1287 
1288 	Copy a struct's elements into buffers that can be sliced into registers.
1289 	Return the sizes of the output buffers in bytes. Pass NULL buffer pointers
1290 	to calculate size only.
1291 	outGPRs[0-7] == r3, outFPRs[0-7] == f1 ...
1292 */
1293 void
ffi64_struct_to_reg_form(const ffi_type * inType,const char * inStruct,unsigned int * ioStructMarker,unsigned int * ioFPRsUsed,char * outGPRs,unsigned int * ioGPRSize,char * outFPRs,unsigned int * ioFPRSize)1294 ffi64_struct_to_reg_form(
1295 	const ffi_type*	inType,
1296 	const char*		inStruct,
1297 	unsigned int*	ioStructMarker,
1298 	unsigned int*	ioFPRsUsed,
1299 	char*			outGPRs,	// caller-allocated
1300 	unsigned int*	ioGPRSize,
1301 	char*			outFPRs,	// caller-allocated
1302 	unsigned int*	ioFPRSize)
1303 {
1304 	size_t			i;
1305 	unsigned int	srcMarker		= 0;
1306 	unsigned int	destGMarker		= 0;
1307 	unsigned int	destFMarker		= 0;
1308 	unsigned int	savedFMarker	= 0;
1309 	unsigned int	fprsUsed		= 0;
1310 	unsigned int	savedFPRsUsed	= 0;
1311 
1312 	static unsigned int	recurseCount	= 0;
1313 
1314 	if (ioStructMarker)
1315 		srcMarker	= *ioStructMarker;
1316 
1317 	if (ioFPRsUsed)
1318 	{
1319 		fprsUsed		= *ioFPRsUsed;
1320 		savedFPRsUsed	= fprsUsed;
1321 	}
1322 
1323 	if (ioGPRSize)
1324 		destGMarker	= *ioGPRSize;
1325 
1326 	if (ioFPRSize)
1327 	{
1328 		destFMarker		= *ioFPRSize;
1329 		savedFMarker	= destFMarker;
1330 	}
1331 
1332 	switch (inType->size)
1333 	{
1334 		case 1: case 2: case 4:
1335 			destGMarker += 8 - inType->size;
1336 			break;
1337 
1338 		default:
1339 			break;
1340 	}
1341 
1342 	for (i = 0; inType->elements[i] != NULL; i++)
1343 	{
1344 		switch (inType->elements[i]->type)
1345 		{
1346 			// Shadow floating-point types in GPRs for vararg and pre-ANSI
1347 			// functions.
1348 			case FFI_TYPE_FLOAT:
1349 				// Nudge markers to next 4/8-byte boundary
1350 				srcMarker = ALIGN(srcMarker, 4);
1351 				destGMarker = ALIGN(destGMarker, 4);
1352 				destFMarker = ALIGN(destFMarker, 8);
1353 
1354 				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1355 				{
1356 					if (outFPRs != NULL && inStruct != NULL)
1357 						*(double*)&outFPRs[destFMarker] =
1358 							(double)*(float*)&inStruct[srcMarker];
1359 
1360 					destFMarker += 8;
1361 					fprsUsed++;
1362 				}
1363 
1364 				if (outGPRs != NULL && inStruct != NULL)
1365 					*(double*)&outGPRs[destGMarker] =
1366 						(double)*(float*)&inStruct[srcMarker];
1367 
1368 				srcMarker += 4;
1369 				destGMarker += 4;
1370 
1371 				// Skip to next GPR if next element won't fit and we're
1372 				// not already at a register boundary.
1373 				if (inType->elements[i + 1] != NULL && (srcMarker % 8))
1374 				{
1375 					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1376 						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1377 						(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1378 						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1379 						(ALIGN(destGMarker, 8) - destGMarker) < 4))
1380 						destGMarker	= ALIGN(destGMarker, 8);
1381 				}
1382 
1383 				break;
1384 
1385 			case FFI_TYPE_DOUBLE:
1386 				srcMarker = ALIGN(srcMarker, 8);
1387 				destFMarker = ALIGN(destFMarker, 8);
1388 
1389 				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1390 				{
1391 					if (outFPRs != NULL && inStruct != NULL)
1392 						*(double*)&outFPRs[destFMarker] =
1393 							*(double*)&inStruct[srcMarker];
1394 
1395 					destFMarker += 8;
1396 					fprsUsed++;
1397 				}
1398 
1399 				if (outGPRs != NULL && inStruct != NULL)
1400 					*(double*)&outGPRs[destGMarker] =
1401 						*(double*)&inStruct[srcMarker];
1402 
1403 				srcMarker += 8;
1404 
1405 				// Skip next GPR
1406 				destGMarker += 8;
1407 				destGMarker = ALIGN(destGMarker, 8);
1408 
1409 				break;
1410 
1411 			case FFI_TYPE_LONGDOUBLE:
1412 				srcMarker = ALIGN(srcMarker, 16);
1413 
1414 				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1415 				{
1416 					destFMarker = ALIGN(destFMarker, 8);
1417 					destGMarker = ALIGN(destGMarker, 8);
1418 
1419 					if (outFPRs != NULL && inStruct != NULL)
1420 						*(long double*)&outFPRs[destFMarker] =
1421 							*(long double*)&inStruct[srcMarker];
1422 
1423 					if (outGPRs != NULL && inStruct != NULL)
1424 						*(long double*)&outGPRs[destGMarker] =
1425 							*(long double*)&inStruct[srcMarker];
1426 
1427 					destFMarker += 16;
1428 					fprsUsed += 2;
1429 				}
1430 				else
1431 				{
1432 				 	destGMarker = ALIGN(destGMarker, 16);
1433 
1434 					 if (outGPRs != NULL && inStruct != NULL)
1435 						*(long double*)&outGPRs[destGMarker] =
1436 							*(long double*)&inStruct[srcMarker];
1437 				}
1438 
1439 				srcMarker += 16;
1440 				destGMarker += 16;	// Skip next 2 GPRs
1441 				destGMarker = ALIGN(destGMarker, 8);	// was 16
1442 
1443 				break;
1444 
1445 			case FFI_TYPE_UINT8:
1446 			case FFI_TYPE_SINT8:
1447 				if (inType->alignment == 1)	// bytes only
1448 				{
1449 					if (inType->size == 1)
1450 					{
1451 						if (outGPRs != NULL && inStruct != NULL)
1452 							outGPRs[destGMarker] = inStruct[srcMarker];
1453 
1454 						srcMarker++;
1455 						destGMarker++;
1456 					}
1457 					else if (inType->size == 2)
1458 					{
1459 						if (outGPRs != NULL && inStruct != NULL)
1460 						{
1461 							outGPRs[destGMarker] = inStruct[srcMarker];
1462 							outGPRs[destGMarker + 1] = inStruct[srcMarker + 1];
1463 						}
1464 
1465 						srcMarker += 2;
1466 						destGMarker += 2;
1467 
1468 						i++;
1469 					}
1470 					else
1471 					{
1472 						if (outGPRs != NULL && inStruct != NULL)
1473 						{
1474 							// Avoid memcpy for small chunks.
1475 							if (inType->size <= sizeof(long))
1476 								*(long*)&outGPRs[destGMarker] =
1477 									*(long*)&inStruct[srcMarker];
1478 							else
1479 								memcpy(&outGPRs[destGMarker],
1480 									&inStruct[srcMarker], inType->size);
1481 						}
1482 
1483 						srcMarker += inType->size;
1484 						destGMarker += inType->size;
1485 						i += inType->size - 1;
1486 					}
1487 				}
1488 				else	// bytes and other stuff
1489 				{
1490 					if (outGPRs != NULL && inStruct != NULL)
1491 						outGPRs[destGMarker] = inStruct[srcMarker];
1492 
1493 					srcMarker++;
1494 					destGMarker++;
1495 
1496 					// Skip to next GPR if next element won't fit and we're
1497 					// not already at a register boundary.
1498 					if (inType->elements[i + 1] != NULL && (destGMarker % 8))
1499 					{
1500 						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1501 							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1502 							(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1503 							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1504 							(ALIGN(destGMarker, 8) - destGMarker) < 4))
1505 							destGMarker	= ALIGN(destGMarker, inType->alignment);	// was 8
1506 					}
1507 				}
1508 
1509 				break;
1510 
1511 			case FFI_TYPE_UINT16:
1512 			case FFI_TYPE_SINT16:
1513 				srcMarker = ALIGN(srcMarker, 2);
1514 				destGMarker = ALIGN(destGMarker, 2);
1515 
1516 				if (outGPRs != NULL && inStruct != NULL)
1517 					*(short*)&outGPRs[destGMarker] =
1518 						*(short*)&inStruct[srcMarker];
1519 
1520 				srcMarker += 2;
1521 				destGMarker += 2;
1522 
1523 				if (inType->elements[i + 1] == NULL)
1524 					destGMarker	= ALIGN(destGMarker, inType->alignment);
1525 
1526 				break;
1527 
1528 			case FFI_TYPE_INT:
1529 			case FFI_TYPE_UINT32:
1530 			case FFI_TYPE_SINT32:
1531 				srcMarker = ALIGN(srcMarker, 4);
1532 				destGMarker = ALIGN(destGMarker, 4);
1533 
1534 				if (outGPRs != NULL && inStruct != NULL)
1535 					*(int*)&outGPRs[destGMarker] =
1536 						*(int*)&inStruct[srcMarker];
1537 
1538 				srcMarker += 4;
1539 				destGMarker += 4;
1540 
1541 				break;
1542 
1543 			case FFI_TYPE_POINTER:
1544 			case FFI_TYPE_UINT64:
1545 			case FFI_TYPE_SINT64:
1546 				srcMarker = ALIGN(srcMarker, 8);
1547 				destGMarker = ALIGN(destGMarker, 8);
1548 
1549 				if (outGPRs != NULL && inStruct != NULL)
1550 					*(long long*)&outGPRs[destGMarker] =
1551 						*(long long*)&inStruct[srcMarker];
1552 
1553 				srcMarker += 8;
1554 				destGMarker += 8;
1555 
1556 				if (inType->elements[i + 1] == NULL)
1557 					destGMarker	= ALIGN(destGMarker, inType->alignment);
1558 
1559 				break;
1560 
1561 			case FFI_TYPE_STRUCT:
1562 				recurseCount++;
1563 				ffi64_struct_to_reg_form(inType->elements[i],
1564 					inStruct, &srcMarker, &fprsUsed, outGPRs,
1565 					&destGMarker, outFPRs, &destFMarker);
1566 				recurseCount--;
1567 				break;
1568 
1569 			default:
1570 				FFI_ASSERT(0);
1571 				break;
1572 		}
1573 	}
1574 
1575 	destGMarker	= ALIGN(destGMarker, inType->alignment);
1576 
1577 	// Take care of the special case for 16-byte structs, but not for
1578 	// nested structs.
1579 	if (recurseCount == 0 && destGMarker == 16)
1580 	{
1581 		if (outGPRs != NULL && inStruct != NULL)
1582 			*(long double*)&outGPRs[0] = *(long double*)&inStruct[0];
1583 
1584 		destFMarker	= savedFMarker;
1585 		fprsUsed	= savedFPRsUsed;
1586 	}
1587 
1588 	if (ioStructMarker)
1589 		*ioStructMarker	= ALIGN(srcMarker, 8);
1590 
1591 	if (ioFPRsUsed)
1592 		*ioFPRsUsed	= fprsUsed;
1593 
1594 	if (ioGPRSize)
1595 		*ioGPRSize = ALIGN(destGMarker, 8);
1596 
1597 	if (ioFPRSize)
1598 		*ioFPRSize = ALIGN(destFMarker, 8);
1599 }
1600 
1601 /*	ffi64_stret_needs_ptr
1602 
1603 	Determine whether a returned struct needs a pointer in r3 or can fit
1604 	in registers.
1605 */
1606 
1607 bool
ffi64_stret_needs_ptr(const ffi_type * inType,unsigned short * ioGPRCount,unsigned short * ioFPRCount)1608 ffi64_stret_needs_ptr(
1609 	const ffi_type*	inType,
1610 	unsigned short*	ioGPRCount,
1611 	unsigned short*	ioFPRCount)
1612 {
1613 	// Obvious case first- struct is larger than combined FPR size.
1614 	if (inType->size > 14 * 8)
1615 		return true;
1616 
1617 	// Now the struct can physically fit in registers, determine if it
1618 	// also fits logically.
1619 	bool			needsPtr	= false;
1620 	unsigned short	gprsUsed	= 0;
1621 	unsigned short	fprsUsed	= 0;
1622 	size_t			i;
1623 
1624 	if (ioGPRCount)
1625 		gprsUsed = *ioGPRCount;
1626 
1627 	if (ioFPRCount)
1628 		fprsUsed = *ioFPRCount;
1629 
1630 	for (i = 0; inType->elements[i] != NULL && !needsPtr; i++)
1631 	{
1632 		switch (inType->elements[i]->type)
1633 		{
1634 			case FFI_TYPE_FLOAT:
1635 			case FFI_TYPE_DOUBLE:
1636 				gprsUsed++;
1637 				fprsUsed++;
1638 
1639 				if (fprsUsed > 13)
1640 					needsPtr = true;
1641 
1642 				break;
1643 
1644 			case FFI_TYPE_LONGDOUBLE:
1645 				gprsUsed += 2;
1646 				fprsUsed += 2;
1647 
1648 				if (fprsUsed > 14)
1649 					needsPtr = true;
1650 
1651 				break;
1652 
1653 			case FFI_TYPE_UINT8:
1654 			case FFI_TYPE_SINT8:
1655 			{
1656 				gprsUsed++;
1657 
1658 				if (gprsUsed > 8)
1659 				{
1660 					needsPtr = true;
1661 					break;
1662 				}
1663 
1664 				if (inType->elements[i + 1] == NULL)	// last byte in the struct
1665 					break;
1666 
1667 				// Count possible contiguous bytes ahead, up to 8.
1668 				unsigned short j;
1669 
1670 				for (j = 1; j < 8; j++)
1671 				{
1672 					if (inType->elements[i + j] == NULL ||
1673 						!FFI_TYPE_1_BYTE(inType->elements[i + j]->type))
1674 						break;
1675 				}
1676 
1677 				i += j - 1;	// allow for i++ before the test condition
1678 
1679 				break;
1680 			}
1681 
1682 			case FFI_TYPE_UINT16:
1683 			case FFI_TYPE_SINT16:
1684 			case FFI_TYPE_INT:
1685 			case FFI_TYPE_UINT32:
1686 			case FFI_TYPE_SINT32:
1687 			case FFI_TYPE_POINTER:
1688 			case FFI_TYPE_UINT64:
1689 			case FFI_TYPE_SINT64:
1690 				gprsUsed++;
1691 
1692 				if (gprsUsed > 8)
1693 					needsPtr = true;
1694 
1695 				break;
1696 
1697 			case FFI_TYPE_STRUCT:
1698 				needsPtr = ffi64_stret_needs_ptr(
1699 					inType->elements[i], &gprsUsed, &fprsUsed);
1700 
1701 				break;
1702 
1703 			default:
1704 				FFI_ASSERT(0);
1705 				break;
1706 		}
1707 	}
1708 
1709 	if (ioGPRCount)
1710 		*ioGPRCount = gprsUsed;
1711 
1712 	if (ioFPRCount)
1713 		*ioFPRCount = fprsUsed;
1714 
1715 	return needsPtr;
1716 }
1717 
1718 /*	ffi64_data_size
1719 
1720 	Calculate the size in bytes of an ffi type.
1721 */
1722 
1723 unsigned int
ffi64_data_size(const ffi_type * inType)1724 ffi64_data_size(
1725 	const ffi_type*	inType)
1726 {
1727 	unsigned int	size = 0;
1728 
1729 	switch (inType->type)
1730 	{
1731 		case FFI_TYPE_UINT8:
1732 		case FFI_TYPE_SINT8:
1733 			size = 1;
1734 			break;
1735 
1736 		case FFI_TYPE_UINT16:
1737 		case FFI_TYPE_SINT16:
1738 			size = 2;
1739 			break;
1740 
1741 		case FFI_TYPE_INT:
1742 		case FFI_TYPE_UINT32:
1743 		case FFI_TYPE_SINT32:
1744 		case FFI_TYPE_FLOAT:
1745 			size = 4;
1746 			break;
1747 
1748 		case FFI_TYPE_POINTER:
1749 		case FFI_TYPE_UINT64:
1750 		case FFI_TYPE_SINT64:
1751 		case FFI_TYPE_DOUBLE:
1752 			size = 8;
1753 			break;
1754 
1755 		case FFI_TYPE_LONGDOUBLE:
1756 			size = 16;
1757 			break;
1758 
1759 		case FFI_TYPE_STRUCT:
1760 			ffi64_struct_to_reg_form(
1761 				inType, NULL, NULL, NULL, NULL, &size, NULL, NULL);
1762 			break;
1763 
1764 		case FFI_TYPE_VOID:
1765 			break;
1766 
1767 		default:
1768 			FFI_ASSERT(0);
1769 			break;
1770 	}
1771 
1772 	return size;
1773 }
1774 
1775 #endif	/*	defined(__ppc64__)	*/
1776 #endif	/* __ppc__ || __ppc64__ */
1777