1 /**
2 Copyright 1993 Bill Triggs <Bill.Triggs@inrialpes.fr>
3 Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 **/
18 /*----------------------------------------------------------------------
19 !!! THIS ROUTINE MUST BE COMPILED gcc -O !!!
20
21 Foreign function interface for a Intel IA-64 in little-endian mode with gcc.
22
23 This calls a C function with an argument list built up using macros
24 defined in avcall.h.
25
26 IA-64 64-bit Argument Passing Conventions:
27
28 The argument sequence is mapped linearly on the registers r32,...,r39,
29 and continued on the stack, in [r12+16], [r12+24], ...
30 Items in this sequence are word-aligned. In gcc < 3.0, structures larger
31 than a single word are even two-word-aligned.
32 Integer/pointer arguments are passed in the allocated slots (registers
33 or stack slots). The first 8 float/double arguments are passed in
34 registers f8,...,f15 instead, but their slots are kept allocated.
35 Structure args are passed like multiple integer arguments; except that
36 structures consisting only of floats or only of doubles are passed like
37 multiple float arguments or multiple double arguments, respectively.
38
39 Integers and pointers are returned in r8, floats and doubles in f8.
40 Structures consisting only of at most 8 floats or only of at most 8 doubles
41 are returned in f8,...,f15. Other than that, structures of size <= 32 bytes
42 are returned in r8,...,r11, as if these were 4 contiguous words in memory.
43 Larger structures are returned in memory; the caller passes the address
44 of the target memory area in r8, and it is returned unmodified in r8.
45 ----------------------------------------------------------------------*/
46 #include "avcall-internal.h"
47
48 #define RETURN(TYPE,VAL) (*(TYPE*)l->raddr = (TYPE)(VAL))
49
50 register __avword* sret __asm__("r8"); /* structure return pointer */
51 /*register __avrword iret __asm__("r8");*/
52 register __avrword iret2 __asm__("r9");
53 register __avrword iret3 __asm__("r10");
54 register __avrword iret4 __asm__("r11");
55 /*register float fret __asm__("f8");*/
56 /*register double dret __asm__("f8");*/
57 register double farg1 __asm__("f8");
58 register double farg2 __asm__("f9");
59 register double farg3 __asm__("f10");
60 register double farg4 __asm__("f11");
61 register double farg5 __asm__("f12");
62 register double farg6 __asm__("f13");
63 register double farg7 __asm__("f14");
64 register double farg8 __asm__("f15");
65
66 int
avcall_call(av_alist * list)67 avcall_call(av_alist* list)
68 {
69 register __avword* sp __asm__("r12"); /* C names for registers */
70
71 __av_alist* l = &AV_LIST_INNER(list);
72
73 __avword* argframe = (sp -= __AV_ALIST_WORDS) + 2; /* make room for argument list */
74 int arglen = l->aptr - l->args;
75 int farglen = l->faptr - l->fargs;
76 __avrword iret;
77
78 {
79 int i;
80 for (i = 8; i < arglen; i++) /* push function args onto stack */
81 argframe[i-8] = l->args[i];
82 }
83
84 /* struct return address */
85 if (l->rtype == __AVstruct)
86 sret = l->raddr;
87
88 /* put max. 8 double args in registers */
89 if (farglen > 0) {
90 farg1 = l->fargs[0];
91 if (farglen > 1) {
92 farg2 = l->fargs[1];
93 if (farglen > 2) {
94 farg3 = l->fargs[2];
95 if (farglen > 3) {
96 farg4 = l->fargs[3];
97 if (farglen > 4) {
98 farg5 = l->fargs[4];
99 if (farglen > 5) {
100 farg6 = l->fargs[5];
101 if (farglen > 6) {
102 farg7 = l->fargs[6];
103 if (farglen > 7)
104 farg8 = l->fargs[7];
105 }
106 }
107 }
108 }
109 }
110 }
111 }
112
113 /* call function, pass 8 integer and 8 double args in registers */
114 if (l->rtype == __AVfloat) {
115 *(float*)l->raddr = (*(float(*)())l->func)(l->args[0], l->args[1],
116 l->args[2], l->args[3],
117 l->args[4], l->args[5],
118 l->args[6], l->args[7]);
119 } else
120 if (l->rtype == __AVdouble) {
121 *(double*)l->raddr = (*(double(*)())l->func)(l->args[0], l->args[1],
122 l->args[2], l->args[3],
123 l->args[4], l->args[5],
124 l->args[6], l->args[7]);
125 } else {
126 iret = (*l->func)(l->args[0], l->args[1], l->args[2], l->args[3],
127 l->args[4], l->args[5], l->args[6], l->args[7]);
128
129 /* save return value */
130 if (l->rtype == __AVvoid) {
131 } else
132 if (l->rtype == __AVchar) {
133 RETURN(char, iret);
134 } else
135 if (l->rtype == __AVschar) {
136 RETURN(signed char, iret);
137 } else
138 if (l->rtype == __AVuchar) {
139 RETURN(unsigned char, iret);
140 } else
141 if (l->rtype == __AVshort) {
142 RETURN(short, iret);
143 } else
144 if (l->rtype == __AVushort) {
145 RETURN(unsigned short, iret);
146 } else
147 if (l->rtype == __AVint) {
148 RETURN(int, iret);
149 } else
150 if (l->rtype == __AVuint) {
151 RETURN(unsigned int, iret);
152 } else
153 if (l->rtype == __AVlong || l->rtype == __AVlonglong) {
154 RETURN(long, iret);
155 } else
156 if (l->rtype == __AVulong || l->rtype == __AVulonglong) {
157 RETURN(unsigned long, iret);
158 } else
159 /* see above
160 if (l->rtype == __AVfloat) {
161 } else
162 if (l->rtype == __AVdouble) {
163 } else
164 */
165 if (l->rtype == __AVvoidp) {
166 RETURN(void*, iret);
167 } else
168 if (l->rtype == __AVstruct) {
169 if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
170 /* Return structs of size <= 32 in registers. */
171 if (l->rsize > 0 && l->rsize <= 32) {
172 void* raddr = l->raddr;
173 #if 0 /* Unoptimized */
174 if (l->rsize >= 1)
175 ((unsigned char *)raddr)[0] = (unsigned char)(iret);
176 if (l->rsize >= 2)
177 ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
178 if (l->rsize >= 3)
179 ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
180 if (l->rsize >= 4)
181 ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
182 if (l->rsize >= 5)
183 ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
184 if (l->rsize >= 6)
185 ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
186 if (l->rsize >= 7)
187 ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
188 if (l->rsize >= 8)
189 ((unsigned char *)raddr)[7] = (unsigned char)(iret>>56);
190 if (l->rsize >= 9) {
191 ((unsigned char *)raddr)[8] = (unsigned char)(iret2);
192 if (l->rsize >= 10)
193 ((unsigned char *)raddr)[9] = (unsigned char)(iret2>>8);
194 if (l->rsize >= 11)
195 ((unsigned char *)raddr)[10] = (unsigned char)(iret2>>16);
196 if (l->rsize >= 12)
197 ((unsigned char *)raddr)[11] = (unsigned char)(iret2>>24);
198 if (l->rsize >= 13)
199 ((unsigned char *)raddr)[12] = (unsigned char)(iret2>>32);
200 if (l->rsize >= 14)
201 ((unsigned char *)raddr)[13] = (unsigned char)(iret2>>40);
202 if (l->rsize >= 15)
203 ((unsigned char *)raddr)[14] = (unsigned char)(iret2>>48);
204 if (l->rsize >= 16)
205 ((unsigned char *)raddr)[15] = (unsigned char)(iret2>>56);
206 if (l->rsize >= 17) {
207 ((unsigned char *)raddr)[16] = (unsigned char)(iret3);
208 if (l->rsize >= 18)
209 ((unsigned char *)raddr)[17] = (unsigned char)(iret3>>8);
210 if (l->rsize >= 19)
211 ((unsigned char *)raddr)[18] = (unsigned char)(iret3>>16);
212 if (l->rsize >= 20)
213 ((unsigned char *)raddr)[19] = (unsigned char)(iret3>>24);
214 if (l->rsize >= 21)
215 ((unsigned char *)raddr)[20] = (unsigned char)(iret3>>32);
216 if (l->rsize >= 22)
217 ((unsigned char *)raddr)[21] = (unsigned char)(iret3>>40);
218 if (l->rsize >= 23)
219 ((unsigned char *)raddr)[22] = (unsigned char)(iret3>>48);
220 if (l->rsize >= 24)
221 ((unsigned char *)raddr)[23] = (unsigned char)(iret3>>56);
222 if (l->rsize >= 25) {
223 ((unsigned char *)raddr)[24] = (unsigned char)(iret4);
224 if (l->rsize >= 26)
225 ((unsigned char *)raddr)[25] = (unsigned char)(iret4>>8);
226 if (l->rsize >= 27)
227 ((unsigned char *)raddr)[26] = (unsigned char)(iret4>>16);
228 if (l->rsize >= 28)
229 ((unsigned char *)raddr)[27] = (unsigned char)(iret4>>24);
230 if (l->rsize >= 29)
231 ((unsigned char *)raddr)[28] = (unsigned char)(iret4>>32);
232 if (l->rsize >= 30)
233 ((unsigned char *)raddr)[29] = (unsigned char)(iret4>>40);
234 if (l->rsize >= 31)
235 ((unsigned char *)raddr)[30] = (unsigned char)(iret4>>48);
236 if (l->rsize >= 32)
237 ((unsigned char *)raddr)[31] = (unsigned char)(iret4>>56);
238 }
239 }
240 }
241 #else /* Optimized: fewer conditional jumps, fewer memory accesses */
242 uintptr_t count = l->rsize; /* > 0, ≤ 4*sizeof(__avrword) */
243 __avrword* wordaddr = (__avrword*)((uintptr_t)raddr & ~(uintptr_t)(sizeof(__avrword)-1));
244 uintptr_t start_offset = (uintptr_t)raddr & (uintptr_t)(sizeof(__avrword)-1); /* ≥ 0, < sizeof(__avrword) */
245 uintptr_t end_offset = start_offset + count; /* > 0, < 5*sizeof(__avrword) */
246 if (count <= sizeof(__avrword)) {
247 /* Use iret. */
248 if (end_offset <= sizeof(__avrword)) {
249 /* 0 < end_offset ≤ sizeof(__avrword) */
250 __avrword mask0 = ((__avrword)2 << (end_offset*8-1)) - ((__avrword)1 << (start_offset*8));
251 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
252 } else {
253 /* sizeof(__avrword) < end_offset < 2*sizeof(__avrword), start_offset > 0 */
254 __avrword mask0 = - ((__avrword)1 << (start_offset*8));
255 __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
256 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
257 wordaddr[1] ^= (wordaddr[1] ^ (iret >> (sizeof(__avrword)*8-start_offset*8))) & mask1;
258 }
259 } else if (count <= 2*sizeof(__avrword)) {
260 /* Use iret, iret2. */
261 __avrword mask0 = - ((__avrword)1 << (start_offset*8));
262 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
263 if (end_offset <= 2*sizeof(__avrword)) {
264 /* sizeof(__avrword) < end_offset ≤ 2*sizeof(__avrword) */
265 __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
266 wordaddr[1] ^= (wordaddr[1] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask1;
267 } else {
268 /* 2*sizeof(__avrword) < end_offset < 3*sizeof(__avrword), start_offset > 0 */
269 __avrword mask2 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
270 wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
271 wordaddr[2] ^= (wordaddr[2] ^ (iret2 >> (sizeof(__avrword)*8-start_offset*8))) & mask2;
272 }
273 } else if (count <= 3*sizeof(__avrword)) {
274 /* Use iret, iret2, iret3. */
275 __avrword mask0 = - ((__avrword)1 << (start_offset*8));
276 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
277 if (end_offset <= 3*sizeof(__avrword)) {
278 /* 2*sizeof(__avrword) < end_offset ≤ 3*sizeof(__avrword) */
279 __avrword mask2 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
280 wordaddr[1] = (iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8));
281 wordaddr[2] ^= (wordaddr[2] ^ ((iret2 >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret3 << (start_offset*8)))) & mask2;
282 } else {
283 /* 3*sizeof(__avrword) < end_offset < 4*sizeof(__avrword), start_offset > 0 */
284 __avrword mask3 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
285 wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
286 wordaddr[2] = (iret2 >> (sizeof(__avrword)*8-start_offset*8)) | (iret3 << (start_offset*8));
287 wordaddr[3] ^= (wordaddr[3] ^ (iret3 >> (sizeof(__avrword)*8-start_offset*8))) & mask3;
288 }
289 } else {
290 /* Use iret, iret2, iret3, iret4. */
291 __avrword mask0 = - ((__avrword)1 << (start_offset*8));
292 wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
293 if (end_offset <= 4*sizeof(__avrword)) {
294 /* 3*sizeof(__avrword) < end_offset ≤ 4*sizeof(__avrword) */
295 __avrword mask3 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
296 wordaddr[1] = (iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8));
297 wordaddr[2] = (iret2 >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret3 << (start_offset*8));
298 wordaddr[3] ^= (wordaddr[3] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask3;
299 } else {
300 /* 4*sizeof(__avrword) < end_offset < 5*sizeof(__avrword), start_offset > 0 */
301 __avrword mask4 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
302 wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
303 wordaddr[2] = (iret2 >> (sizeof(__avrword)*8-start_offset*8)) | (iret3 << (start_offset*8));
304 wordaddr[3] = (iret3 >> (sizeof(__avrword)*8-start_offset*8)) | (iret4 << (start_offset*8));
305 wordaddr[4] ^= (wordaddr[4] ^ (iret4 >> (sizeof(__avrword)*8-start_offset*8))) & mask4;
306 }
307 }
308 #endif
309 }
310 }
311 }
312 }
313 return 0;
314 }
315