1 /**
2 Copyright 1993 Bill Triggs <Bill.Triggs@inrialpes.fr>
3 Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 **/
18 /*----------------------------------------------------------------------
19 !!! THIS ROUTINE MUST BE COMPILED gcc -O -fno-omit-frame-pointer !!!
20
21 Foreign function interface for a Linux arm64 (a.k.a. aarch64) with gcc.
22
23 This calls a C function with an argument list built up using macros
24 defined in avcall.h.
25
26 ARM64 Argument Passing Conventions are documented in
27 http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf.
28
29 Up to 8 words are passed in integer registers (x0, ..., x7).
30 Up to 8 float/double arguments are passed in floating point / SIMD
31 registers (v0/q0/d0/s0, ..., v7/q7/d7/s7).
32 Arguments passed on the stack have 8-bytes alignment (but on macOS only
33 4-bytes alignment).
34 Structure args larger than 16 bytes are passed as pointers to caller-made
35 local copies. (§ 5.4.2 rule B.3)
36 Structure args <= 16 bytes are passed as up to two words in registers
37 (§ 5.4.2 rule C.10) or otherwise on the stack (§ 5.4.2 rule C.13).
38
39 Integers are returned in x0, x1.
40 Float/double values are returned in d0/s0, d1/s1.
41 Structures <= 16 bytes are returned in registers. To return a structure
42 larger than 16 bytes, the called function copies the value to space
43 pointed to by x8.
44 ----------------------------------------------------------------------*/
45 #include "avcall-internal.h"
46
47 #define RETURN(TYPE,VAL) (*(TYPE*)l->raddr = (TYPE)(VAL))
48
49 register __avword* sret __asm__("x8"); /* structure return pointer */
50
51 register __avrword iarg1 __asm__("x0");
52 register __avrword iarg2 __asm__("x1");
53 register __avrword iarg3 __asm__("x2");
54 register __avrword iarg4 __asm__("x3");
55 register __avrword iarg5 __asm__("x4");
56 register __avrword iarg6 __asm__("x5");
57 register __avrword iarg7 __asm__("x6");
58 register __avrword iarg8 __asm__("x7");
59
60 register float farg1 __asm__("s0");
61 register float farg2 __asm__("s1");
62 register float farg3 __asm__("s2");
63 register float farg4 __asm__("s3");
64 register float farg5 __asm__("s4");
65 register float farg6 __asm__("s5");
66 register float farg7 __asm__("s6");
67 register float farg8 __asm__("s7");
68
69 register double darg1 __asm__("d0");
70 register double darg2 __asm__("d1");
71 register double darg3 __asm__("d2");
72 register double darg4 __asm__("d3");
73 register double darg5 __asm__("d4");
74 register double darg6 __asm__("d5");
75 register double darg7 __asm__("d6");
76 register double darg8 __asm__("d7");
77
78 int
avcall_call(av_alist * list)79 avcall_call(av_alist* list)
80 {
81 register __avword* sp __asm__("sp"); /* C names for registers */
82 register __avrword iretreg __asm__("x0");
83 register __avrword iret2reg __asm__("x1");
84 register double dret __asm__("d0");
85
86 __av_alist* l = &AV_LIST_INNER(list);
87
88 __avword* argframe = __builtin_alloca(__AV_ALIST_WORDS * sizeof(__avword)); /* make room for argument list */
89 int arglen = l->aptr - l->args;
90 unsigned int ianum = l->ianum;
91 unsigned int fanum = l->fanum;
92 __avrword iret, iret2;
93
94 {
95 int i;
96 for (i = 0; i < arglen; i++) /* push function args onto stack */
97 argframe[i] = l->args[i];
98 }
99
100 /* Put up to 8 integer args into registers. */
101 if (ianum >= 1) {
102 iarg1 = l->iargs[0];
103 if (ianum >= 2) {
104 iarg2 = l->iargs[1];
105 if (ianum >= 3) {
106 iarg3 = l->iargs[2];
107 if (ianum >= 4) {
108 iarg4 = l->iargs[3];
109 if (ianum >= 5) {
110 iarg5 = l->iargs[4];
111 if (ianum >= 6) {
112 iarg6 = l->iargs[5];
113 if (ianum >= 7) {
114 iarg7 = l->iargs[6];
115 if (ianum >= 8) {
116 iarg8 = l->iargs[7];
117 }
118 }
119 }
120 }
121 }
122 }
123 }
124 }
125
126 /* Put upto 8 floating-point args into registers. */
127 if (fanum >= 1) {
128 if (l->darg_mask & (1 << 0)) darg1 = l->dargs[0];
129 else if (l->farg_mask & (1 << 0)) farg1 = l->fargs[0];
130 if (fanum >= 2) {
131 if (l->darg_mask & (1 << 1)) darg2 = l->dargs[1];
132 else if (l->farg_mask & (1 << 1)) farg2 = l->fargs[1];
133 if (fanum >= 3) {
134 if (l->darg_mask & (1 << 2)) darg3 = l->dargs[2];
135 else if (l->farg_mask & (1 << 2)) farg3 = l->fargs[2];
136 if (fanum >= 4) {
137 if (l->darg_mask & (1 << 3)) darg4 = l->dargs[3];
138 else if (l->farg_mask & (1 << 3)) farg4 = l->fargs[3];
139 if (fanum >= 5) {
140 if (l->darg_mask & (1 << 4)) darg5 = l->dargs[4];
141 else if (l->farg_mask & (1 << 4)) farg5 = l->fargs[4];
142 if (fanum >= 6) {
143 if (l->darg_mask & (1 << 5)) darg6 = l->dargs[5];
144 else if (l->farg_mask & (1 << 5)) farg6 = l->fargs[5];
145 if (fanum >= 7) {
146 if (l->darg_mask & (1 << 6)) darg7 = l->dargs[6];
147 else if (l->farg_mask & (1 << 6)) farg7 = l->fargs[6];
148 if (fanum >= 8) {
149 if (l->darg_mask & (1 << 7)) darg8 = l->dargs[7];
150 else if (l->farg_mask & (1 << 7)) farg8 = l->fargs[7];
151 }
152 }
153 }
154 }
155 }
156 }
157 }
158 }
159
160 /* Call function. */
161 if (l->rtype == __AVfloat) {
162 *(float*)l->raddr = (*(float(*)())l->func)();
163 } else
164 if (l->rtype == __AVdouble) {
165 *(double*)l->raddr = (*(double(*)())l->func)();
166 } else {
167 iret = (*l->func)();
168 iret2 = iret2reg;
169
170 /* save return value */
171 if (l->rtype == __AVvoid) {
172 } else
173 if (l->rtype == __AVchar) {
174 RETURN(char, iret);
175 } else
176 if (l->rtype == __AVschar) {
177 RETURN(signed char, iret);
178 } else
179 if (l->rtype == __AVuchar) {
180 RETURN(unsigned char, iret);
181 } else
182 if (l->rtype == __AVshort) {
183 RETURN(short, iret);
184 } else
185 if (l->rtype == __AVushort) {
186 RETURN(unsigned short, iret);
187 } else
188 if (l->rtype == __AVint) {
189 RETURN(int, iret);
190 } else
191 if (l->rtype == __AVuint) {
192 RETURN(unsigned int, iret);
193 } else
194 if (l->rtype == __AVlong || l->rtype == __AVlonglong) {
195 RETURN(long, iret);
196 } else
197 if (l->rtype == __AVulong || l->rtype == __AVulonglong) {
198 RETURN(unsigned long, iret);
199 } else
200 /* see above
201 if (l->rtype == __AVfloat) {
202 } else
203 if (l->rtype == __AVdouble) {
204 } else
205 */
206 if (l->rtype == __AVvoidp) {
207 RETURN(void*, iret);
208 } else
209 if (l->rtype == __AVstruct) {
210 if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
211 /* Return structs of size <= 16 in registers. */
212 if (l->rsize > 0 && l->rsize <= 16) {
213 void* raddr = l->raddr;
214 #if 0 /* Unoptimized */
215 if (l->rsize == 1) {
216 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
217 } else
218 if (l->rsize == 2) {
219 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
220 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
221 } else
222 if (l->rsize == 3) {
223 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
224 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
225 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
226 } else
227 if (l->rsize == 4) {
228 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
229 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
230 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
231 ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
232 } else
233 if (l->rsize == 5) {
234 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
235 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
236 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
237 ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
238 ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
239 } else
240 if (l->rsize == 6) {
241 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
242 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
243 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
244 ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
245 ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
246 ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
247 } else
248 if (l->rsize == 7) {
249 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
250 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
251 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
252 ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
253 ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
254 ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
255 ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
256 } else
257 if (l->rsize >= 8 && l->rsize <= 16) {
258 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
259 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
260 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
261 ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
262 ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
263 ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
264 ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
265 ((unsigned char *)raddr)[7] = (unsigned char)(iret>>56);
266 if (l->rsize == 8) {
267 } else
268 if (l->rsize == 9) {
269 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
270 } else
271 if (l->rsize == 10) {
272 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
273 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
274 } else
275 if (l->rsize == 11) {
276 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
277 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
278 ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
279 } else
280 if (l->rsize == 12) {
281 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
282 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
283 ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
284 ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
285 } else
286 if (l->rsize == 13) {
287 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
288 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
289 ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
290 ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
291 ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
292 } else
293 if (l->rsize == 14) {
294 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
295 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
296 ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
297 ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
298 ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
299 ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
300 } else
301 if (l->rsize == 15) {
302 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
303 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
304 ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
305 ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
306 ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
307 ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
308 ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48);
309 } else
310 if (l->rsize == 16) {
311 ((unsigned char *)raddr)[8+0] = (unsigned char)(iret2);
312 ((unsigned char *)raddr)[8+1] = (unsigned char)(iret2>>8);
313 ((unsigned char *)raddr)[8+2] = (unsigned char)(iret2>>16);
314 ((unsigned char *)raddr)[8+3] = (unsigned char)(iret2>>24);
315 ((unsigned char *)raddr)[8+4] = (unsigned char)(iret2>>32);
316 ((unsigned char *)raddr)[8+5] = (unsigned char)(iret2>>40);
317 ((unsigned char *)raddr)[8+6] = (unsigned char)(iret2>>48);
318 ((unsigned char *)raddr)[8+7] = (unsigned char)(iret2>>56);
319 }
320 }
321 #else /* Optimized: fewer conditional jumps, fewer memory accesses */
322 uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avrword) */
323 __avrword* wordaddr = (__avrword*)((uintptr_t)raddr & ~(uintptr_t)(sizeof(__avrword)-1));
324 uintptr_t start_offset = (uintptr_t)raddr & (uintptr_t)(sizeof(__avrword)-1); /* ≥ 0, < sizeof(__avrword) */
325 uintptr_t end_offset = start_offset + count; /* > 0, < 3*sizeof(__avrword) */
326 if (count <= sizeof(__avrword)) {
327 /* Use iret. */
328 if (end_offset <= sizeof(__avrword)) {
329 /* 0 < end_offset ≤ sizeof(__avrword) */
330 __avrword mask0 = ((__avrword)2 << (end_offset*8-1)) - ((__avrword)1 << (start_offset*8));
331 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
332 } else {
333 /* sizeof(__avrword) < end_offset < 2*sizeof(__avrword), start_offset > 0 */
334 __avrword mask0 = - ((__avrword)1 << (start_offset*8));
335 __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
336 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
337 wordaddr[1] ^= (wordaddr[1] ^ (iret >> (sizeof(__avrword)*8-start_offset*8))) & mask1;
338 }
339 } else {
340 /* Use iret, iret2. */
341 __avrword mask0 = - ((__avrword)1 << (start_offset*8));
342 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
343 if (end_offset <= 2*sizeof(__avrword)) {
344 /* sizeof(__avrword) < end_offset ≤ 2*sizeof(__avrword) */
345 __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
346 wordaddr[1] ^= (wordaddr[1] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask1;
347 } else {
348 /* 2*sizeof(__avrword) < end_offset < 3*sizeof(__avrword), start_offset > 0 */
349 __avrword mask2 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
350 wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
351 wordaddr[2] ^= (wordaddr[2] ^ (iret2 >> (sizeof(__avrword)*8-start_offset*8))) & mask2;
352 }
353 }
354 #endif
355 }
356 }
357 }
358 }
359 return 0;
360 }
361