1 /**
2   Copyright 1993 Bill Triggs <Bill.Triggs@inrialpes.fr>
3   Copyright 1995-2021 Bruno Haible <bruno@clisp.org>
4 
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2 of the License, or
8   (at your option) any later version.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <https://www.gnu.org/licenses/>.
17 **/
18 /*----------------------------------------------------------------------
19   !!! THIS ROUTINE MUST BE COMPILED gcc -O !!!
20 
21   Foreign function interface for a Intel IA-64 in little-endian mode with gcc.
22 
23   This calls a C function with an argument list built up using macros
24   defined in avcall.h.
25 
26   IA-64 64-bit Argument Passing Conventions:
27 
28   The argument sequence is mapped linearly on the registers r32,...,r39,
29   and continued on the stack, in [r12+16], [r12+24], ...
30   Items in this sequence are word-aligned. In gcc < 3.0, structures larger
31   than a single word are even two-word-aligned.
32   Integer/pointer arguments are passed in the allocated slots (registers
33   or stack slots). The first 8 float/double arguments are passed in
34   registers f8,...,f15 instead, but their slots are kept allocated.
35   Structure args are passed like multiple integer arguments; except that
36   structures consisting only of floats or only of doubles are passed like
37   multiple float arguments or multiple double arguments, respectively.
38 
39   Integers and pointers are returned in r8, floats and doubles in f8.
40   Structures consisting only of at most 8 floats or only of at most 8 doubles
41   are returned in f8,...,f15. Other than that, structures of size <= 32 bytes
42   are returned in r8,...,r11, as if these were 4 contiguous words in memory.
43   Larger structures are returned in memory; the caller passes the address
44   of the target memory area in r8, and it is returned unmodified in r8.
45   ----------------------------------------------------------------------*/
46 #include "avcall-internal.h"
47 
48 #define RETURN(TYPE,VAL)	(*(TYPE*)l->raddr = (TYPE)(VAL))
49 
50 register __avword*	sret	__asm__("r8");  /* structure return pointer */
51 /*register __avrword	iret	__asm__("r8");*/
52 register __avrword	iret2	__asm__("r9");
53 register __avrword	iret3	__asm__("r10");
54 register __avrword	iret4	__asm__("r11");
55 /*register float	fret	__asm__("f8");*/
56 /*register double	dret	__asm__("f8");*/
57 register double		farg1	__asm__("f8");
58 register double		farg2	__asm__("f9");
59 register double		farg3	__asm__("f10");
60 register double		farg4	__asm__("f11");
61 register double		farg5	__asm__("f12");
62 register double		farg6	__asm__("f13");
63 register double		farg7	__asm__("f14");
64 register double		farg8	__asm__("f15");
65 
66 int
avcall_call(av_alist * list)67 avcall_call(av_alist* list)
68 {
69   register __avword*	sp	__asm__("r12"); /* C names for registers */
70 
71   __av_alist* l = &AV_LIST_INNER(list);
72 
73   __avword* argframe = (sp -= __AV_ALIST_WORDS) + 2; /* make room for argument list */
74   int arglen = l->aptr - l->args;
75   int farglen = l->faptr - l->fargs;
76   __avrword iret;
77 
78   {
79     int i;
80     for (i = 8; i < arglen; i++)	/* push function args onto stack */
81       argframe[i-8] = l->args[i];
82   }
83 
84   /* struct return address */
85   if (l->rtype == __AVstruct)
86     sret = l->raddr;
87 
88   /* put max. 8 double args in registers */
89   if (farglen > 0) {
90     farg1 = l->fargs[0];
91     if (farglen > 1) {
92       farg2 = l->fargs[1];
93       if (farglen > 2) {
94         farg3 = l->fargs[2];
95         if (farglen > 3) {
96           farg4 = l->fargs[3];
97           if (farglen > 4) {
98             farg5 = l->fargs[4];
99             if (farglen > 5) {
100               farg6 = l->fargs[5];
101               if (farglen > 6) {
102                 farg7 = l->fargs[6];
103                 if (farglen > 7)
104                   farg8 = l->fargs[7];
105               }
106             }
107           }
108         }
109       }
110     }
111   }
112 
113   /* call function, pass 8 integer and 8 double args in registers */
114   if (l->rtype == __AVfloat) {
115     *(float*)l->raddr = (*(float(*)())l->func)(l->args[0], l->args[1],
116 					       l->args[2], l->args[3],
117 					       l->args[4], l->args[5],
118 					       l->args[6], l->args[7]);
119   } else
120   if (l->rtype == __AVdouble) {
121     *(double*)l->raddr = (*(double(*)())l->func)(l->args[0], l->args[1],
122 						 l->args[2], l->args[3],
123 						 l->args[4], l->args[5],
124 						 l->args[6], l->args[7]);
125   } else {
126     iret = (*l->func)(l->args[0], l->args[1], l->args[2], l->args[3],
127 		      l->args[4], l->args[5], l->args[6], l->args[7]);
128 
129     /* save return value */
130     if (l->rtype == __AVvoid) {
131     } else
132     if (l->rtype == __AVchar) {
133       RETURN(char, iret);
134     } else
135     if (l->rtype == __AVschar) {
136       RETURN(signed char, iret);
137     } else
138     if (l->rtype == __AVuchar) {
139       RETURN(unsigned char, iret);
140     } else
141     if (l->rtype == __AVshort) {
142       RETURN(short, iret);
143     } else
144     if (l->rtype == __AVushort) {
145       RETURN(unsigned short, iret);
146     } else
147     if (l->rtype == __AVint) {
148       RETURN(int, iret);
149     } else
150     if (l->rtype == __AVuint) {
151       RETURN(unsigned int, iret);
152     } else
153     if (l->rtype == __AVlong || l->rtype == __AVlonglong) {
154       RETURN(long, iret);
155     } else
156     if (l->rtype == __AVulong || l->rtype == __AVulonglong) {
157       RETURN(unsigned long, iret);
158     } else
159   /* see above
160     if (l->rtype == __AVfloat) {
161     } else
162     if (l->rtype == __AVdouble) {
163     } else
164   */
165     if (l->rtype == __AVvoidp) {
166       RETURN(void*, iret);
167     } else
168     if (l->rtype == __AVstruct) {
169       if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
170         /* Return structs of size <= 32 in registers. */
171         if (l->rsize > 0 && l->rsize <= 32) {
172           void* raddr = l->raddr;
173           #if 0 /* Unoptimized */
174           if (l->rsize >= 1)
175             ((unsigned char *)raddr)[0] = (unsigned char)(iret);
176           if (l->rsize >= 2)
177             ((unsigned char *)raddr)[1] = (unsigned char)(iret>>8);
178           if (l->rsize >= 3)
179             ((unsigned char *)raddr)[2] = (unsigned char)(iret>>16);
180           if (l->rsize >= 4)
181             ((unsigned char *)raddr)[3] = (unsigned char)(iret>>24);
182           if (l->rsize >= 5)
183             ((unsigned char *)raddr)[4] = (unsigned char)(iret>>32);
184           if (l->rsize >= 6)
185             ((unsigned char *)raddr)[5] = (unsigned char)(iret>>40);
186           if (l->rsize >= 7)
187             ((unsigned char *)raddr)[6] = (unsigned char)(iret>>48);
188           if (l->rsize >= 8)
189             ((unsigned char *)raddr)[7] = (unsigned char)(iret>>56);
190           if (l->rsize >= 9) {
191             ((unsigned char *)raddr)[8] = (unsigned char)(iret2);
192             if (l->rsize >= 10)
193               ((unsigned char *)raddr)[9] = (unsigned char)(iret2>>8);
194             if (l->rsize >= 11)
195               ((unsigned char *)raddr)[10] = (unsigned char)(iret2>>16);
196             if (l->rsize >= 12)
197               ((unsigned char *)raddr)[11] = (unsigned char)(iret2>>24);
198             if (l->rsize >= 13)
199               ((unsigned char *)raddr)[12] = (unsigned char)(iret2>>32);
200             if (l->rsize >= 14)
201               ((unsigned char *)raddr)[13] = (unsigned char)(iret2>>40);
202             if (l->rsize >= 15)
203               ((unsigned char *)raddr)[14] = (unsigned char)(iret2>>48);
204             if (l->rsize >= 16)
205               ((unsigned char *)raddr)[15] = (unsigned char)(iret2>>56);
206             if (l->rsize >= 17) {
207               ((unsigned char *)raddr)[16] = (unsigned char)(iret3);
208               if (l->rsize >= 18)
209                 ((unsigned char *)raddr)[17] = (unsigned char)(iret3>>8);
210               if (l->rsize >= 19)
211                 ((unsigned char *)raddr)[18] = (unsigned char)(iret3>>16);
212               if (l->rsize >= 20)
213                 ((unsigned char *)raddr)[19] = (unsigned char)(iret3>>24);
214               if (l->rsize >= 21)
215                 ((unsigned char *)raddr)[20] = (unsigned char)(iret3>>32);
216               if (l->rsize >= 22)
217                 ((unsigned char *)raddr)[21] = (unsigned char)(iret3>>40);
218               if (l->rsize >= 23)
219                 ((unsigned char *)raddr)[22] = (unsigned char)(iret3>>48);
220               if (l->rsize >= 24)
221                 ((unsigned char *)raddr)[23] = (unsigned char)(iret3>>56);
222               if (l->rsize >= 25) {
223                 ((unsigned char *)raddr)[24] = (unsigned char)(iret4);
224                 if (l->rsize >= 26)
225                   ((unsigned char *)raddr)[25] = (unsigned char)(iret4>>8);
226                 if (l->rsize >= 27)
227                   ((unsigned char *)raddr)[26] = (unsigned char)(iret4>>16);
228                 if (l->rsize >= 28)
229                   ((unsigned char *)raddr)[27] = (unsigned char)(iret4>>24);
230                 if (l->rsize >= 29)
231                   ((unsigned char *)raddr)[28] = (unsigned char)(iret4>>32);
232                 if (l->rsize >= 30)
233                   ((unsigned char *)raddr)[29] = (unsigned char)(iret4>>40);
234                 if (l->rsize >= 31)
235                   ((unsigned char *)raddr)[30] = (unsigned char)(iret4>>48);
236                 if (l->rsize >= 32)
237                   ((unsigned char *)raddr)[31] = (unsigned char)(iret4>>56);
238               }
239             }
240           }
241           #else /* Optimized: fewer conditional jumps, fewer memory accesses */
242           uintptr_t count = l->rsize; /* > 0, ≤ 4*sizeof(__avrword) */
243           __avrword* wordaddr = (__avrword*)((uintptr_t)raddr & ~(uintptr_t)(sizeof(__avrword)-1));
244           uintptr_t start_offset = (uintptr_t)raddr & (uintptr_t)(sizeof(__avrword)-1); /* ≥ 0, < sizeof(__avrword) */
245           uintptr_t end_offset = start_offset + count; /* > 0, < 5*sizeof(__avrword) */
246           if (count <= sizeof(__avrword)) {
247             /* Use iret. */
248             if (end_offset <= sizeof(__avrword)) {
249               /* 0 < end_offset ≤ sizeof(__avrword) */
250               __avrword mask0 = ((__avrword)2 << (end_offset*8-1)) - ((__avrword)1 << (start_offset*8));
251               wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
252             } else {
253               /* sizeof(__avrword) < end_offset < 2*sizeof(__avrword), start_offset > 0 */
254               __avrword mask0 = - ((__avrword)1 << (start_offset*8));
255               __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
256               wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
257               wordaddr[1] ^= (wordaddr[1] ^ (iret >> (sizeof(__avrword)*8-start_offset*8))) & mask1;
258             }
259           } else if (count <= 2*sizeof(__avrword)) {
260             /* Use iret, iret2. */
261             __avrword mask0 = - ((__avrword)1 << (start_offset*8));
262             wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
263             if (end_offset <= 2*sizeof(__avrword)) {
264               /* sizeof(__avrword) < end_offset ≤ 2*sizeof(__avrword) */
265               __avrword mask1 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
266               wordaddr[1] ^= (wordaddr[1] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask1;
267             } else {
268               /* 2*sizeof(__avrword) < end_offset < 3*sizeof(__avrword), start_offset > 0 */
269               __avrword mask2 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
270               wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
271               wordaddr[2] ^= (wordaddr[2] ^ (iret2 >> (sizeof(__avrword)*8-start_offset*8))) & mask2;
272             }
273           } else if (count <= 3*sizeof(__avrword)) {
274             /* Use iret, iret2, iret3. */
275             __avrword mask0 = - ((__avrword)1 << (start_offset*8));
276             wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
277             if (end_offset <= 3*sizeof(__avrword)) {
278               /* 2*sizeof(__avrword) < end_offset ≤ 3*sizeof(__avrword) */
279               __avrword mask2 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
280               wordaddr[1] = (iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8));
281               wordaddr[2] ^= (wordaddr[2] ^ ((iret2 >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret3 << (start_offset*8)))) & mask2;
282             } else {
283               /* 3*sizeof(__avrword) < end_offset < 4*sizeof(__avrword), start_offset > 0 */
284               __avrword mask3 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
285               wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
286               wordaddr[2] = (iret2 >> (sizeof(__avrword)*8-start_offset*8)) | (iret3 << (start_offset*8));
287               wordaddr[3] ^= (wordaddr[3] ^ (iret3 >> (sizeof(__avrword)*8-start_offset*8))) & mask3;
288             }
289           } else {
290             /* Use iret, iret2, iret3, iret4. */
291             __avrword mask0 = - ((__avrword)1 << (start_offset*8));
292             wordaddr[0] ^= (wordaddr[0] ^ (iret << (start_offset*8))) & mask0;
293             if (end_offset <= 4*sizeof(__avrword)) {
294               /* 3*sizeof(__avrword) < end_offset ≤ 4*sizeof(__avrword) */
295               __avrword mask3 = ((__avrword)2 << (end_offset*8-sizeof(__avrword)*8-1)) - 1;
296               wordaddr[1] = (iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8));
297               wordaddr[2] = (iret2 >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret3 << (start_offset*8));
298               wordaddr[3] ^= (wordaddr[3] ^ ((iret >> (sizeof(__avrword)*4-start_offset*4) >> (sizeof(__avrword)*4-start_offset*4)) | (iret2 << (start_offset*8)))) & mask3;
299             } else {
300               /* 4*sizeof(__avrword) < end_offset < 5*sizeof(__avrword), start_offset > 0 */
301               __avrword mask4 = ((__avrword)2 << (end_offset*8-2*sizeof(__avrword)*8-1)) - 1;
302               wordaddr[1] = (iret >> (sizeof(__avrword)*8-start_offset*8)) | (iret2 << (start_offset*8));
303               wordaddr[2] = (iret2 >> (sizeof(__avrword)*8-start_offset*8)) | (iret3 << (start_offset*8));
304               wordaddr[3] = (iret3 >> (sizeof(__avrword)*8-start_offset*8)) | (iret4 << (start_offset*8));
305               wordaddr[4] ^= (wordaddr[4] ^ (iret4 >> (sizeof(__avrword)*8-start_offset*8))) & mask4;
306             }
307           }
308           #endif
309         }
310       }
311     }
312   }
313   return 0;
314 }
315