1 /**
2   Copyright 1993 Bill Triggs <Bill.Triggs@inrialpes.fr>
3   Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
4 
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2 of the License, or
8   (at your option) any later version.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <https://www.gnu.org/licenses/>.
17 **/
18 /*----------------------------------------------------------------------
19   !!! THIS ROUTINE MUST BE COMPILED gcc -O -fno-omit-frame-pointer !!!
20 
21   Foreign function interface for a Linux arm64 (a.k.a. aarch64) with gcc.
22 
23   This calls a C function with an argument list built up using macros
24   defined in avcall.h.
25 
26   ARM64 Argument Passing Conventions are documented in
27   http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf.
28 
29   Up to 8 words are passed in integer registers (x0, ..., x7).
30   Up to 8 float/double arguments are passed in floating point / SIMD
31   registers (v0/q0/d0/s0, ..., v7/q7/d7/s7).
32   Arguments passed on the stack have 8-bytes alignment (but on macOS only
33   4-bytes alignment).
34   Structure args larger than 16 bytes are passed as pointers to caller-made
35   local copies. (§ 5.4.2 rule B.3)
36   Structure args <= 16 bytes are passed as up to two words in registers
37   (§ 5.4.2 rule C.10) or otherwise on the stack (§ 5.4.2 rule C.13).
38 
39   Integers are returned in x0, x1.
40   Float/double values are returned in d0/s0, d1/s1.
41   Structures <= 16 bytes are returned in registers. To return a structure
42   larger than 16 bytes, the called function copies the value to space
43   pointed to by x8.
44   ----------------------------------------------------------------------*/
45 #include "avcall-internal.h"
46 
47 #define RETURN(TYPE,VAL)	(*(TYPE*)l->raddr = (TYPE)(VAL))
48 
49 register __avword* sret __asm__("x8");  /* structure return pointer */
50 
51 register __avrword iarg1 __asm__("x0");
52 register __avrword iarg2 __asm__("x1");
53 register __avrword iarg3 __asm__("x2");
54 register __avrword iarg4 __asm__("x3");
55 register __avrword iarg5 __asm__("x4");
56 register __avrword iarg6 __asm__("x5");
57 register __avrword iarg7 __asm__("x6");
58 register __avrword iarg8 __asm__("x7");
59 
60 register float farg1 __asm__("s0");
61 register float farg2 __asm__("s1");
62 register float farg3 __asm__("s2");
63 register float farg4 __asm__("s3");
64 register float farg5 __asm__("s4");
65 register float farg6 __asm__("s5");
66 register float farg7 __asm__("s6");
67 register float farg8 __asm__("s7");
68 
69 register double darg1 __asm__("d0");
70 register double darg2 __asm__("d1");
71 register double darg3 __asm__("d2");
72 register double darg4 __asm__("d3");
73 register double darg5 __asm__("d4");
74 register double darg6 __asm__("d5");
75 register double darg7 __asm__("d6");
76 register double darg8 __asm__("d7");
77 
78 int
avcall_call(av_alist * list)79 avcall_call(av_alist* list)
80 {
81   register __avword*	sp	__asm__("sp");	/* C names for registers */
82   register __avrword	iretreg	 __asm__("x0");
83   register __avrword	iret2reg __asm__("x1");
84   register double	dret	__asm__("d0");
85 
86   __av_alist* l = &AV_LIST_INNER(list);
87 
88   __avword* argframe = __builtin_alloca(__AV_ALIST_WORDS * sizeof(__avword)); /* make room for argument list */
89   int arglen = l->aptr - l->args;
90   unsigned int ianum = l->ianum;
91   unsigned int fanum = l->fanum;
92   __avrword iret, iret2;
93 
94   {
95     int i;
96     for (i = 0; i < arglen; i++)	/* push function args onto stack */
97       argframe[i] = l->args[i];
98   }
99 
100   /* Put up to 8 integer args into registers. */
101   if (ianum >= 1) {
102     iarg1 = l->iargs[0];
103     if (ianum >= 2) {
104       iarg2 = l->iargs[1];
105       if (ianum >= 3) {
106         iarg3 = l->iargs[2];
107         if (ianum >= 4) {
108           iarg4 = l->iargs[3];
109           if (ianum >= 5) {
110             iarg5 = l->iargs[4];
111             if (ianum >= 6) {
112               iarg6 = l->iargs[5];
113               if (ianum >= 7) {
114                 iarg7 = l->iargs[6];
115                 if (ianum >= 8) {
116                   iarg8 = l->iargs[7];
117                 }
118               }
119             }
120           }
121         }
122       }
123     }
124   }
125 
126   /* Put upto 8 floating-point args into registers. */
127   if (fanum >= 1) {
128     if (l->darg_mask & (1 << 0)) darg1 = l->dargs[0];
129     else if (l->farg_mask & (1 << 0)) farg1 = l->fargs[0];
130     if (fanum >= 2) {
131       if (l->darg_mask & (1 << 1)) darg2 = l->dargs[1];
132       else if (l->farg_mask & (1 << 1)) farg2 = l->fargs[1];
133       if (fanum >= 3) {
134         if (l->darg_mask & (1 << 2)) darg3 = l->dargs[2];
135         else if (l->farg_mask & (1 << 2)) farg3 = l->fargs[2];
136         if (fanum >= 4) {
137           if (l->darg_mask & (1 << 3)) darg4 = l->dargs[3];
138           else if (l->farg_mask & (1 << 3)) farg4 = l->fargs[3];
139           if (fanum >= 5) {
140             if (l->darg_mask & (1 << 4)) darg5 = l->dargs[4];
141             else if (l->farg_mask & (1 << 4)) farg5 = l->fargs[4];
142             if (fanum >= 6) {
143               if (l->darg_mask & (1 << 5)) darg6 = l->dargs[5];
144               else if (l->farg_mask & (1 << 5)) farg6 = l->fargs[5];
145               if (fanum >= 7) {
146                 if (l->darg_mask & (1 << 6)) darg7 = l->dargs[6];
147                 else if (l->farg_mask & (1 << 6)) farg7 = l->fargs[6];
148                 if (fanum >= 8) {
149                   if (l->darg_mask & (1 << 7)) darg8 = l->dargs[7];
150                   else if (l->farg_mask & (1 << 7)) farg8 = l->fargs[7];
151                 }
152               }
153             }
154           }
155         }
156       }
157     }
158   }
159 
160   /* Call function. */
161   if (l->rtype == __AVfloat) {
162     *(float*)l->raddr = (*(float(*)())l->func)();
163   } else
164   if (l->rtype == __AVdouble) {
165     *(double*)l->raddr = (*(double(*)())l->func)();
166   } else {
167     iret = (*l->func)();
168     iret2 = iret2reg;
169 
170     /* save return value */
171     if (l->rtype == __AVvoid) {
172     } else
173     if (l->rtype == __AVchar) {
174       RETURN(char, iret);
175     } else
176     if (l->rtype == __AVschar) {
177       RETURN(signed char, iret);
178     } else
179     if (l->rtype == __AVuchar) {
180       RETURN(unsigned char, iret);
181     } else
182     if (l->rtype == __AVshort) {
183       RETURN(short, iret);
184     } else
185     if (l->rtype == __AVushort) {
186       RETURN(unsigned short, iret);
187     } else
188     if (l->rtype == __AVint) {
189       RETURN(int, iret);
190     } else
191     if (l->rtype == __AVuint) {
192       RETURN(unsigned int, iret);
193     } else
194     if (l->rtype == __AVlong || l->rtype == __AVlonglong) {
195       RETURN(long, iret);
196     } else
197     if (l->rtype == __AVulong || l->rtype == __AVulonglong) {
198       RETURN(unsigned long, iret);
199     } else
200   /* see above
201     if (l->rtype == __AVfloat) {
202     } else
203     if (l->rtype == __AVdouble) {
204     } else
205   */
206     if (l->rtype == __AVvoidp) {
207       RETURN(void*, iret);
208     } else
209     if (l->rtype == __AVstruct) {
210       if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
211         /* Return structs of size <= 16 in registers. */
212         if (l->rsize > 0 && l->rsize <= 16) {
213           void* raddr = l->raddr;
214           #if 0 /* Unoptimized */
215           if (l->rsize == 1) {
216             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
217           } else
218           if (l->rsize == 2) {
219             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
220             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
221           } else
222           if (l->rsize == 3) {
223             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
224             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
225             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
226           } else
227           if (l->rsize == 4) {
228             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
229             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
230             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
231             ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
232           } else
233           if (l->rsize == 5) {
234             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
235             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
236             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
237             ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
238             ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
239           } else
240           if (l->rsize == 6) {
241             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
242             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
243             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
244             ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
245             ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
246             ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
247           } else
248           if (l->rsize == 7) {
249             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
250             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
251             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
252             ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
253             ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
254             ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
255             ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
256           } else
257           if (l->rsize >= 8 && l->rsize <= 16) {
258             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
259             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
260             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
261             ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
262             ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
263             ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
264             ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
265             ((unsigned char *)raddr)[7] = (unsigned char)(iret>>56);
266             if (l->rsize == 8) {
267             } else
268             if (l->rsize == 9) {
269               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
270             } else
271             if (l->rsize == 10) {
272               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
273               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
274             } else
275             if (l->rsize == 11) {
276               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
277               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
278               ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
279             } else
280             if (l->rsize == 12) {
281               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
282               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
283               ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
284               ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
285             } else
286             if (l->rsize == 13) {
287               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
288               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
289               ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
290               ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
291               ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
292             } else
293             if (l->rsize == 14) {
294               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
295               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
296               ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
297               ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
298               ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
299               ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
300             } else
301             if (l->rsize == 15) {
302               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
303               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
304               ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
305               ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
306               ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
307               ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
308               ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48);
309             } else
310             if (l->rsize == 16) {
311               ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
312               ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
313               ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
314               ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
315               ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
316               ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
317               ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48);
318               ((unsigned char *)raddr)[8+7] = (unsigned char)(iret2>>56);
319             }
320           }
321           #else /* Optimized: fewer conditional jumps, fewer memory accesses */
322           uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avrword) */
323           __avrword* wordaddr = (__avrword*)((uintptr_t)raddr & ~(uintptr_t)(sizeof(__avrword)-1));
324           uintptr_t start_offset = (uintptr_t)raddr & (uintptr_t)(sizeof(__avrword)-1); /* ≥ 0, < sizeof(__avrword) */
325           uintptr_t end_offset = start_offset + count; /* > 0, < 3*sizeof(__avrword) */
326           if (count <= sizeof(__avrword)) {
327             /* Use iret. */
328             if (end_offset <= sizeof(__avrword)) {
329               /* 0 < end_offset ≤ sizeof(__avrword) */
330               __avrword mask0 = ((__avrword)2 << (end_offset*8-1)) - ((__avrword)1 << (start_offset*8));
331               wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
332             } else {
333               /* sizeof(__avrword) < end_offset < 2*sizeof(__avrword), start_offset > 0 */
334               __avrword mask0 = - ((__avrword)1 << (start_offset*8));
335               __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
336               wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
337               wordaddr[1] ^= (wordaddr[1] ^ (iret >> (sizeof(__avrword)*8-start_offset*8))) & mask1;
338             }
339           } else {
340             /* Use iret, iret2. */
341             __avrword mask0 = - ((__avrword)1 << (start_offset*8));
342             wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
343             if (end_offset <= 2*sizeof(__avrword)) {
344               /* sizeof(__avrword) < end_offset ≤ 2*sizeof(__avrword) */
345               __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
346               wordaddr[1] ^= (wordaddr[1] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask1;
347             } else {
348               /* 2*sizeof(__avrword) < end_offset < 3*sizeof(__avrword), start_offset > 0 */
349               __avrword mask2 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
350               wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
351               wordaddr[2] ^= (wordaddr[2] ^ (iret2 >> (sizeof(__avrword)*8-start_offset*8))) & mask2;
352             }
353           }
354           #endif
355         }
356       }
357     }
358   }
359   return 0;
360 }
361