1 /* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA.
2    Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 
4    This file is free software; you can redistribute it and/or modify it under
5    the terms of the GNU General Public License as published by the Free
6    Software Foundation; either version 3 of the License, or (at your option)
7    any later version.
8 
9    This file is distributed in the hope that it will be useful, but WITHOUT
10    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12    for more details.
13 
14    Under Section 7 of GPL version 3, you are granted additional
15    permissions described in the GCC Runtime Library Exception, version
16    3.1, as published by the Free Software Foundation.
17 
18    You should have received a copy of the GNU General Public License and
19    a copy of the GCC Runtime Library Exception along with this program;
20    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
21    <http://www.gnu.org/licenses/>.  */
22 
23 /*  TODO:
24     misc ops (traps)
25     supervisor/hypervisor mode ops.  */
26 
27 #ifndef  _PPU_INTRINSICS_H
28 #define _PPU_INTRINSICS_H
29 
30 #if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__) \
31     && !defined(__GNUC__)
32   #error ppu_intrinsics.h included on wrong platform/compiler
33 #endif
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 /*
40  * unsigned int __cntlzw(unsigned int)
41  * unsigned int __cntlzd(unsigned long long)
42  * int __mulhw(int, int)
43  * unsigned int __mulhwu(unsigned int, unsigned int)
44  * long long __mulhd(long long, long long)
45  * unsigned long long __mulhdu(unsigned long long, unsigned long long)
46  *
47  * void __sync(void)
48  * void __isync(void)
49  * void __lwsync(void)
50  * void __eieio(void)
51  *
52  * void __nop(void)
53  * void __cctpl(void)
54  * void __cctpm(void)
55  * void __cctph(void)
56  * void __db8cyc(void)
57  * void __db10cyc(void)
58  * void __db12cyc(void)
59  * void __db16cyc(void)
60  *
61  * void __mtspr(unsigned int spr, unsigned long long value)
62  * unsigned long long __mfspr(unsigned int spr)
63  * unsigned long long __mftb(void)
64  *
65  * void __icbi(void *base)
66  * void __dcbi(void *base)
67  *
68  * void __dcbf(void *base)
69  * void __dcbz(void *base)
70  * void __dcbst(void *base)
71  * void __dcbtst(void *base)
72  * void __dcbt(void *base)
73  * void __dcbt_TH1000(void *EATRUNC, bool D, bool UG, int ID)
74  * void __dcbt_TH1010(bool GO, int S, int UNITCNT, bool T, bool U, int ID)
75  *
76  * unsigned __lwarx(void *base)
77  * unsigned long long __ldarx(void *base)
78  * bool __stwcx(void *base, unsigned value)
79  * bool __stdcx(void *base, unsigned long long value)
80  *
81  * unsigned short __lhbrx(void *base)
82  * unsigned int __lwbrx(void *base)
83  * unsigned long long __ldbrx(void *base)
84  * void __sthbrx(void *base, unsigned short value)
85  * void __stwbrx(void *base, unsigned int value)
86  * void __stdbrx(void *base, unsigned long long value)
87  *
88  * double __fabs(double x)
89  * float __fabsf(float x)
90  * double __fnabs(double x)
91  * float __fnabsf(float x)
92  * double __fmadd(double x, double y, double z)
93  * double __fmsub(double x, double y, double z)
94  * double __fnmadd(double x, double y, double z)
95  * double __fnmsub(double x, double y, double z)
96  * float __fmadds(float x, float y, float z)
97  * float __fmsubs(float x, float y, float z)
98  * float __fnmadds(float x, float y, float z)
99  * float __fnmsubs(float x, float y, float z)
100  * double __fsel(double x, double y, double z)
101  * float __fsels(float x, float y, float z)
102  * double __frsqrte(double x)
103  * float __fres(float x)
104  * double __fsqrt(double x)
105  * float __fsqrts(float x)
106  * long long __fctid(double x)
107  * long long __fctiw(double x)
108  * double __fcfid(long long x)
109  * double __mffs(void)
110  * void __mtfsf(int mask, double value)
111  * void __mtfsfi(int bits, int field)
112  * void __mtfsb0(int)
113  * void __mtfsb1(int)
114  * double __setflm(double)
115  *
116  * dcbt intrinsics
117  * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID)
118  * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID)
119  * void __protected_stream_stop_all (void)
120  * void __protected_stream_stop (unsigned int ID)
121  * void __protected_stream_count (unsigned int unit_cnt, unsigned int ID)
122  * void __protected_stream_go (void)
123  */
124 
125 typedef int __V4SI __attribute__((vector_size(16)));
126 
127 #define __cntlzw(v) __builtin_clz(v)
128 #define __cntlzd(v) __builtin_clzll(v)
129 
130 #define __mulhw(a,b) __extension__ \
131   ({int result;			   \
132   __asm__ ("mulhw %0,%1,%2"	   \
133 	   : "=r" (result)	   \
134 	   : "r" ((int) (a)),	   \
135 	     "r" ((int) (b)));	   \
136   result; })
137 
138 #define __mulhwu(a,b) __extension__	\
139   ({unsigned int result;		\
140   __asm__ ("mulhwu %0,%1,%2"		\
141 	   : "=r" (result)		\
142 	   : "r" ((unsigned int) (a)),	\
143 	     "r" ((unsigned int) (b))); \
144   result; })
145 
146 #ifdef __powerpc64__
147 #define __mulhd(a,b) __extension__   \
148   ({ long long result;		     \
149   __asm__ ("mulhd %0,%1,%2"	     \
150 	   : "=r" (result)	     \
151 	   : "r" ((long long) (a)),  \
152 	     "r" ((long long) (b))); \
153   result; })
154 
155 #define __mulhdu(a,b) __extension__	      \
156   ({unsigned long long result;		      \
157   __asm__ ("mulhdu %0,%1,%2"		      \
158 	   : "=r" (result)		      \
159 	   : "r" ((unsigned long long) (a)),  \
160 	     "r" ((unsigned long long) (b))); \
161   result; })
162 #endif /* __powerpc64__ */
163 
164 #define __sync() __asm__ volatile ("sync" : : : "memory")
165 #define __isync() __asm__ volatile ("isync" : : : "memory")
166 #define __lwsync() __asm__ volatile ("lwsync" : : : "memory")
167 #define __eieio() __asm__ volatile ("eieio" : : : "memory")
168 
169 #define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
170 #define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
171 #define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
172 #define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
173 #define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
174 #define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
175 #define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
176 #define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
177 
178 #ifdef __powerpc64__
179 #define __mtspr(spr, value) \
180   __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value))
181 
182 #define __mfspr(spr) __extension__				\
183   ({ unsigned long long result;					\
184   __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \
185   result; })
186 #endif /* __powerpc64__ */
187 
188 #ifdef __powerpc64__
189 /* Work around the hardware bug in the current Cell implementation.  */
190 #define __mftb() __extension__					\
191   ({ unsigned long long result;					\
192   __asm__ volatile ("1: mftb %[current_tb]\n"			\
193       "\tcmpwi 7, %[current_tb], 0\n"				\
194       "\tbeq-  7, 1b"						\
195       : [current_tb] "=r" (result):				\
196       :"cr7");							\
197   result; })
198 #else
199 #define __mftb() __extension__			\
200   ({ unsigned long long result;			\
201   unsigned long t;				\
202   __asm__ volatile ("1:\n"			\
203 		    "\tmftbu %0\n"		\
204 		    "\tmftb %L0\n"		\
205 		    "\tmftbu %1\n"		\
206 		    "\tcmpw %0,%1\n"		\
207 		    "\tbne 1b"			\
208 		    : "=r" (result), "=r" (t));	\
209   result; })
210 #endif /* __powerpc64__ */
211 
212 #define __dcbf(base) \
213   __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
214 
215 #define __dcbz(base) \
216   __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
217 
218 #define __dcbst(base) \
219   __asm__ volatile ("dcbst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
220 
221 #define __dcbtst(base) \
222   __asm__ volatile ("dcbtst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
223 
224 #define __dcbt(base) \
225   __asm__ volatile ("dcbt %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
226 
227 #define __icbi(base) \
228   __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
229 
230 #define __dcbt_TH1000(EATRUNC, D, UG, ID)				\
231   __asm__ volatile ("dcbt %y0,8"					\
232 	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F)	\
233 	   		       | ((((D) & 1) << 6)			\
234 	   		       | (((UG) & 1) << 5)			\
235 	   		       | ((ID) & 0xF)))) : : "memory")
236 
237 #define __dcbt_TH1010(GO, S, UNITCNT, T, U, ID)			     \
238   __asm__ volatile ("dcbt %y0,10"				     \
239 	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (GO) & 1) << 31) \
240 	   		       | (((S) & 0x3) << 29)		     \
241 	   		       | (((UNITCNT) & 0x3FF) << 7)	     \
242 	   		       | (((T) & 1) << 6)			     \
243 	   		       | (((U) & 1) << 5)			     \
244 	   		       | ((ID) & 0xF))) : : "memory")
245 
246 #define __protected_unlimited_stream_set(DIRECTION, ADDR, ID)	\
247 	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 1, (ID))
248 
249 #define __protected_stream_set(DIRECTION, ADDR, ID)	\
250 	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 0, (ID))
251 
252 #define __protected_stream_stop_all()			\
253 	__dcbt_TH1010 (0, 3, 0, 0, 0, 0)
254 
255 #define __protected_stream_stop(ID)			\
256 	__dcbt_TH1010 (0, 2, 0, 0, 0, (ID))
257 
258 #define __protected_stream_count(COUNT, ID)		\
259 	__dcbt_TH1010 (0, 0, (COUNT), 0, 0, (ID))
260 
261 #define __protected_stream_go()				\
262 	__dcbt_TH1010 (1, 0, 0, 0, 0, 0)
263 
264 #define __lhbrx(base) __extension__		\
265   ({unsigned short result;	       		\
266     typedef  struct {char a[2];} halfwordsize;	\
267     halfwordsize *ptrp = (halfwordsize*)(void*)(base);	\
268   __asm__ ("lhbrx %0,%y1"			\
269 	   : "=r" (result)			\
270 	   : "Z" (*ptrp));			\
271   result; })
272 
273 #define __lwbrx(base) __extension__		\
274   ({unsigned int result;	       		\
275     typedef  struct {char a[4];} wordsize;	\
276     wordsize *ptrp = (wordsize*)(void*)(base);		\
277   __asm__ ("lwbrx %0,%y1"			\
278 	   : "=r" (result)			\
279 	   : "Z" (*ptrp));			\
280   result; })
281 
282 
283 #ifdef __powerpc64__
284 #define __ldbrx(base) __extension__			\
285   ({unsigned long long result;	       			\
286     typedef  struct {char a[8];} doublewordsize;	\
287     doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
288   __asm__ ("ldbrx %0,%y1"				\
289 	   : "=r" (result)				\
290 	   : "Z" (*ptrp));				\
291   result; })
292 #else
293 #define __ldbrx(base) __extension__			\
294   ({unsigned long long result;	       			\
295     typedef  struct {char a[8];} doublewordsize;	\
296     doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
297   __asm__ ("lwbrx %L0,%y1\n"				\
298 	   "\tlwbrx %0,%y2"				\
299 	   : "=&r" (result)				\
300 	   : "Z" (*ptrp), "Z" (*((char *) ptrp + 4)));	\
301   result; })
302 #endif /* __powerpc64__ */
303 
304 
305 #define __sthbrx(base, value) do {			\
306     typedef  struct {char a[2];} halfwordsize;		\
307     halfwordsize *ptrp = (halfwordsize*)(void*)(base);		\
308     __asm__ ("sthbrx %1,%y0"				\
309 	   : "=Z" (*ptrp)				\
310 	   : "r" (value));				\
311    } while (0)
312 
313 #define __stwbrx(base, value) do {		\
314     typedef  struct {char a[4];} wordsize;	\
315     wordsize *ptrp = (wordsize*)(void*)(base);		\
316     __asm__ ("stwbrx %1,%y0"			\
317 	   : "=Z" (*ptrp)			\
318 	   : "r" (value));			\
319    } while (0)
320 
321 #ifdef __powerpc64__
322 #define __stdbrx(base, value) do {			\
323     typedef  struct {char a[8];} doublewordsize;	\
324     doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
325     __asm__ ("stdbrx %1,%y0"				\
326 	   : "=Z" (*ptrp)				\
327 	   : "r" (value));				\
328    } while (0)
329 #else
330 #define __stdbrx(base, value) do {			\
331     typedef  struct {char a[8];} doublewordsize;	\
332     doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
333     __asm__ ("stwbrx %L2,%y0\n"				\
334 	     "\tstwbrx %2,%y1"				\
335 	   : "=Z" (*ptrp), "=Z" (*((char *) ptrp + 4))	\
336 	   : "r" (value));				\
337    } while (0)
338 #endif /* __powerpc64__ */
339 
340 
341 #define __lwarx(base) __extension__		\
342   ({unsigned int result;	       		\
343     typedef  struct {char a[4];} wordsize;	\
344     wordsize *ptrp = (wordsize*)(void*)(base);	\
345   __asm__ volatile ("lwarx %0,%y1"		\
346 	   : "=r" (result)			\
347 	   : "Z" (*ptrp));			\
348   result; })
349 
350 #ifdef __powerpc64__
351 #define __ldarx(base) __extension__			\
352   ({unsigned long long result;	       			\
353     typedef  struct {char a[8];} doublewordsize;	\
354     doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
355   __asm__ volatile ("ldarx %0,%y1"			\
356 	   : "=r" (result)				\
357 	   : "Z" (*ptrp));				\
358   result; })
359 #endif /* __powerpc64__ */
360 
361 #define __stwcx(base, value) __extension__	\
362   ({unsigned int result;			\
363     typedef  struct {char a[4];} wordsize;	\
364     wordsize *ptrp = (wordsize*)(void*)(base);	\
365   __asm__ volatile ("stwcx. %2,%y1\n"		\
366 	   "\tmfocrf %0,0x80"			\
367 	   : "=r" (result),			\
368 	     "=Z" (*ptrp)			\
369 	   : "r" (value) : "cr0");		\
370   ((result & 0x20000000) >> 29); })
371 
372 
373 #ifdef __powerpc64__
374 #define __stdcx(base, value) __extension__		\
375   ({unsigned long long result;				\
376     typedef  struct {char a[8];} doublewordsize;	\
377     doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
378   __asm__ volatile ("stdcx. %2,%y1\n"			\
379 	   "\tmfocrf %0,0x80"				\
380 	   : "=r" (result),				\
381 	     "=Z" (*ptrp)				\
382 	   : "r" (value) : "cr0");			\
383   ((result & 0x20000000) >> 29); })
384 #endif /* __powerpc64__ */
385 
386 #define __mffs() __extension__			\
387   ({double result;				\
388   __asm__ volatile ("mffs %0" : "=d" (result)); \
389   result; })
390 
391 #define __mtfsf(mask,value) \
392   __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value)))
393 
394 #define __mtfsfi(bits,field) \
395   __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field))
396 
397 #define __mtfsb0(bit) __asm__ volatile ("mtfsb0 %0" : : "n" (bit))
398 #define __mtfsb1(bit) __asm__ volatile ("mtfsb1 %0" : : "n" (bit))
399 
400 #define __setflm(v) __extension__	      \
401   ({double result;			      \
402   __asm__ volatile ("mffs %0\n\tmtfsf 255,%1" \
403 		    : "=&d" (result)	      \
404 		    : "d" ((double) (v)));    \
405   result; })
406 
407 /* __builtin_fabs may perform unnecessary rounding.  */
408 
409 /* Rename __fabs and __fabsf to work around internal prototypes defined
410    in bits/mathcalls.h with some glibc versions.  */
411 #define __fabs __ppu_fabs
412 #define __fabsf __ppu_fabsf
413 
414 static __inline__ double __fabs(double x) __attribute__((always_inline));
415 static __inline__ double
__fabs(double x)416 __fabs(double x)
417 {
418   double r;
419   __asm__("fabs %0,%1" : "=d"(r) : "d"(x));
420   return r;
421 }
422 
423 static __inline__ float __fabsf(float x) __attribute__((always_inline));
424 static __inline__ float
__fabsf(float x)425 __fabsf(float x)
426 {
427   float r;
428   __asm__("fabs %0,%1" : "=f"(r) : "f"(x));
429   return r;
430 }
431 
432 static __inline__ double __fnabs(double x) __attribute__((always_inline));
433 static __inline__ double
__fnabs(double x)434 __fnabs(double x)
435 {
436   double r;
437   __asm__("fnabs %0,%1" : "=d"(r) : "d"(x));
438   return r;
439 }
440 
441 static __inline__ float __fnabsf(float x) __attribute__((always_inline));
442 static __inline__ float
__fnabsf(float x)443 __fnabsf(float x)
444 {
445   float r;
446   __asm__("fnabs %0,%1" : "=f"(r) : "f"(x));
447   return r;
448 }
449 
450 static __inline__ double __fmadd(double x, double y, double z)
451   __attribute__((always_inline));
452 static __inline__ double
__fmadd(double x,double y,double z)453 __fmadd(double x, double y, double z)
454 {
455   double r;
456   __asm__("fmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
457   return r;
458 }
459 
460 static __inline__ double __fmsub(double x, double y, double z)
461   __attribute__((always_inline));
462 static __inline__ double
__fmsub(double x,double y,double z)463 __fmsub(double x, double y, double z)
464 {
465   double r;
466   __asm__("fmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
467   return r;
468 }
469 
470 static __inline__ double __fnmadd(double x, double y, double z)
471   __attribute__((always_inline));
472 static __inline__ double
__fnmadd(double x,double y,double z)473 __fnmadd(double x, double y, double z)
474 {
475   double r;
476   __asm__("fnmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
477   return r;
478 }
479 
480 static __inline__ double __fnmsub(double x, double y, double z)
481   __attribute__((always_inline));
482 static __inline__ double
__fnmsub(double x,double y,double z)483 __fnmsub(double x, double y, double z)
484 {
485   double r;
486   __asm__("fnmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
487   return r;
488 }
489 
490 static __inline__ float __fmadds(float x, float y, float z)
491   __attribute__((always_inline));
492 static __inline__ float
__fmadds(float x,float y,float z)493 __fmadds(float x, float y, float z)
494 {
495   float r;
496   __asm__("fmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
497   return r;
498 }
499 
500 static __inline__ float __fmsubs(float x, float y, float z)
501   __attribute__((always_inline));
502 static __inline__ float
__fmsubs(float x,float y,float z)503 __fmsubs(float x, float y, float z)
504 {
505   float r;
506   __asm__("fmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
507   return r;
508 }
509 
510 static __inline__ float __fnmadds(float x, float y, float z)
511   __attribute__((always_inline));
512 static __inline__ float
__fnmadds(float x,float y,float z)513 __fnmadds(float x, float y, float z)
514 {
515   float r;
516   __asm__("fnmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
517   return r;
518 }
519 
520 static __inline__ float __fnmsubs(float x, float y, float z)
521   __attribute__((always_inline));
522 static __inline__ float
__fnmsubs(float x,float y,float z)523 __fnmsubs(float x, float y, float z)
524 {
525   float r;
526   __asm__("fnmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
527   return r;
528 }
529 
530 static __inline__ double __fsel(double x, double y, double z)
531   __attribute__((always_inline));
532 static __inline__ double
__fsel(double x,double y,double z)533 __fsel(double x, double y, double z)
534 {
535   double r;
536   __asm__("fsel %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
537   return r;
538 }
539 
540 static __inline__ float __fsels(float x, float y, float z)
541   __attribute__((always_inline));
542 static __inline__ float
__fsels(float x,float y,float z)543 __fsels(float x, float y, float z)
544 {
545   float r;
546   __asm__("fsel %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
547   return r;
548 }
549 
550 static __inline__ double __frsqrte(double x) __attribute__((always_inline));
551 static __inline__ double
__frsqrte(double x)552 __frsqrte(double x)
553 {
554   double r;
555   __asm__("frsqrte %0,%1" : "=d" (r) : "d" (x));
556   return r;
557 }
558 
559 static __inline__ float __fres(float x) __attribute__((always_inline));
560 static __inline__ float
__fres(float x)561 __fres(float x)
562 {
563   float r;
564   __asm__("fres %0,%1" : "=f"(r) : "f"(x));
565   return r;
566 }
567 
568 static __inline__ double __fsqrt(double x) __attribute__((always_inline));
569 static __inline__ double
__fsqrt(double x)570 __fsqrt(double x)
571 {
572   double r;
573   __asm__("fsqrt %0,%1" : "=d"(r) : "d"(x));
574   return r;
575 }
576 
577 static __inline__ float __fsqrts(float x) __attribute__((always_inline));
578 static __inline__ float
__fsqrts(float x)579 __fsqrts(float x)
580 {
581   float r;
582   __asm__("fsqrts %0,%1" : "=f"(r) : "f"(x));
583   return r;
584 }
585 
586 static __inline__ double __fmul (double a, double b) __attribute__ ((always_inline));
587 static __inline__ double
__fmul(double a,double b)588 __fmul(double a, double b)
589 {
590   double d;
591   __asm__ ("fmul %0,%1,%2" : "=d" (d) : "d" (a), "d" (b));
592   return d;
593 }
594 
595 static __inline__ float __fmuls (float a, float b) __attribute__ ((always_inline));
596 static __inline__ float
__fmuls(float a,float b)597 __fmuls (float a, float b)
598 {
599   float d;
600   __asm__ ("fmuls %0,%1,%2" : "=d" (d) : "f" (a), "f" (b));
601   return d;
602 }
603 
604 static __inline__ float __frsp (float a) __attribute__ ((always_inline));
605 static __inline__ float
__frsp(float a)606 __frsp (float a)
607 {
608   float d;
609   __asm__ ("frsp %0,%1" : "=d" (d) : "f" (a));
610   return d;
611 }
612 
613 static __inline__ double __fcfid (long long a) __attribute__((always_inline));
614 static __inline__ double
__fcfid(long long a)615 __fcfid (long long a)
616 {
617   double d;
618   __asm__ ("fcfid %0,%1" : "=d" (d) : "d" (a));
619   return d;
620 }
621 
622 static __inline__ long long __fctid (double a) __attribute__ ((always_inline));
623 static __inline__ long long
__fctid(double a)624 __fctid (double a)
625 {
626   long long d;
627   __asm__ ("fctid %0,%1" : "=d" (d) : "d" (a));
628   return d;
629 }
630 
631 static __inline__ long long __fctidz (double a) __attribute__ ((always_inline));
632 static __inline__ long long
__fctidz(double a)633 __fctidz (double a)
634 {
635   long long d;
636   __asm__ ("fctidz %0,%1" : "=d" (d) : "d" (a));
637   return d;
638 }
639 
640 static __inline__ int __fctiw (double a) __attribute__ ((always_inline));
641 static __inline__ int
__fctiw(double a)642 __fctiw (double a)
643 {
644   unsigned long long d;
645   __asm__ ("fctiw %0,%1" : "=d" (d) : "d" (a));
646   return (int) d;
647 }
648 
649 static __inline__ int __fctiwz (double a) __attribute__ ((always_inline));
650 static __inline__ int
__fctiwz(double a)651 __fctiwz (double a)
652 {
653   long long d;
654   __asm__ ("fctiwz %0,%1" : "=d" (d) : "d" (a));
655   return (int) d;
656 }
657 
658 #ifdef __powerpc64__
659 #define __rldcl(a,b,mb) __extension__ \
660   ({ \
661     unsigned long long d; \
662     __asm__ ("rldcl %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (mb)); \
663     d; \
664   })
665 
666 #define __rldcr(a,b,me) __extension__ \
667   ({ \
668     unsigned long long d; \
669     __asm__ ("rldcr %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (me)); \
670     d; \
671   })
672 
673 #define __rldic(a,sh,mb) __extension__ \
674   ({ \
675     unsigned long long d; \
676     __asm__ ("rldic %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
677     d; \
678   })
679 
680 #define __rldicl(a,sh,mb) __extension__ \
681   ({ \
682     unsigned long long d; \
683     __asm__ ("rldicl %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
684     d; \
685   })
686 
687 #define __rldicr(a,sh,me) __extension__ \
688   ({ \
689     unsigned long long d; \
690     __asm__ ("rldicr %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (me)); \
691     d; \
692   })
693 
694 #define __rldimi(a,b,sh,mb) __extension__ \
695   ({ \
696     unsigned long long d; \
697     __asm__ ("rldimi %0,%1,%2,%3" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "0" (a)); \
698     d; \
699   })
700 #endif /* __powerpc64__ */
701 
702 #define __rlwimi(a,b,sh,mb,me) __extension__ \
703   ({ \
704     unsigned int d; \
705     __asm__ ("rlwimi %0,%1,%2,%3,%4" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "i" (me), "0" (a)); \
706     d; \
707   })
708 
709 #define __rlwinm(a,sh,mb,me) __extension__ \
710   ({ \
711     unsigned int d; \
712     __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "i" (sh), "i" (mb), "i" (me)); \
713     d; \
714   })
715 
716 #define __rlwnm(a,b,mb,me) __extension__ \
717   ({ \
718     unsigned int d; \
719     __asm__ ("rlwnm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "r" (b), "i" (mb), "i" (me)); \
720     d; \
721   })
722 
723 #ifdef __cplusplus
724 }
725 #endif
726 
727 #endif /* _PPU_INTRINSICS_H */
728