1 /*
2  * Copyright (c) 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20  * THE SOFTWARE.
21  */
22 
23 #define SNAN 0x001
24 #define QNAN 0x002
25 #define NINF 0x004
26 #define NNOR 0x008
27 #define NSUB 0x010
28 #define NZER 0x020
29 #define PZER 0x040
30 #define PSUB 0x080
31 #define PNOR 0x100
32 #define PINF 0x200
33 
34 #define HAVE_BITALIGN() (0)
35 
36 #define MATH_DIVIDE(X, Y) ((X) / (Y))
37 #define MATH_RECIP(X) (1.0f / (X))
38 #define MATH_SQRT(X) sqrt(X)
39 
40 #define SIGNBIT_SP32      0x80000000
41 #define EXSIGNBIT_SP32    0x7fffffff
42 #define EXPBITS_SP32      0x7f800000
43 #define MANTBITS_SP32     0x007fffff
44 #define MANTSIGNBITS_SP32 0x807fffff
45 #define ONEEXPBITS_SP32   0x3f800000
46 #define TWOEXPBITS_SP32   0x40000000
47 #define HALFEXPBITS_SP32  0x3f000000
48 #define IMPBIT_SP32       0x00800000
49 #define QNANBITPATT_SP32  0x7fc00000
50 #define INDEFBITPATT_SP32 0xffc00000
51 #define PINFBITPATT_SP32  0x7f800000
52 #define NINFBITPATT_SP32  0xff800000
53 #define EXPBIAS_SP32      127
54 #define EXPSHIFTBITS_SP32 23
55 #define BIASEDEMIN_SP32   1
56 #define EMIN_SP32         -126
57 #define BIASEDEMAX_SP32   254
58 #define EMAX_SP32         127
59 #define LAMBDA_SP32       1.0e30
60 #define MANTLENGTH_SP32   24
61 #define BASEDIGITS_SP32   7
62 #define ISNEG_SP32(x)     (as_itype(x) & (itype)SIGNBIT_SP32)
63 #define vINFINITY_SP32    (as_vtype((utype)PINFBITPATT_SP32))
64 #define vNINFINITY_SP32   (as_vtype((utype)NINFBITPATT_SP32))
65 #define vNAN_SP32         (as_vtype((utype)QNANBITPATT_SP32))
66 #define vZERO_SP32        (vtype)0.0f
67 #define vONE_SP32        (vtype)1.0f
68 
69 #ifdef cl_khr_fp64
70 
71 #define SIGNBIT_DP64      0x8000000000000000L
72 #define EXSIGNBIT_DP64    0x7fffffffffffffffL
73 #define EXPBITS_DP64      0x7ff0000000000000L
74 #define MANTBITS_DP64     0x000fffffffffffffL
75 #define MANTSIGNBITS_DP64 0x800fffffffffffffL
76 #define ONEEXPBITS_DP64   0x3ff0000000000000L
77 #define TWOEXPBITS_DP64   0x4000000000000000L
78 #define HALFEXPBITS_DP64  0x3fe0000000000000L
79 #define IMPBIT_DP64       0x0010000000000000L
80 #define QNANBITPATT_DP64  0x7ff8000000000000L
81 #define INDEFBITPATT_DP64 0xfff8000000000000L
82 #define PINFBITPATT_DP64  0x7ff0000000000000L
83 #define NINFBITPATT_DP64  0xfff0000000000000L
84 #define EXPBIAS_DP64      1023
85 #define EXPSHIFTBITS_DP64 52
86 #define BIASEDEMIN_DP64   1
87 #define EMIN_DP64         -1022
88 #define BIASEDEMAX_DP64   2046 /* 0x7fe */
89 #define EMAX_DP64         1023 /* 0x3ff */
90 #define LAMBDA_DP64       1.0e300
91 #define MANTLENGTH_DP64   53
92 #define BASEDIGITS_DP64   15
93 #define ISNEG_DP64(x)     (as_itype(x) & (itype)SIGNBIT_DP64)
94 #define vINFINITY_DP64    (as_vtype((utype)PINFBITPATT_DP64))
95 #define vNINFINITY_DP64   (as_vtype((utype)NINFBITPATT_DP64))
96 #define vNAN_DP64         (as_vtype((utype)QNANBITPATT_DP64))
97 #define vZERO_DP64        (vtype)0.0
98 #define vONE_DP64         (vtype)1.0
99 
100 #endif // cl_khr_fp64
101 
102 #define ALIGNED(x)  __attribute__((aligned(x)))
103 
104 
105 #ifdef cl_khr_fp64
106 
107 typedef struct { double lo,hi; } v2double;
108 typedef struct { double2 lo,hi; } v2double2;
109 typedef struct { double3 lo,hi; } v2double3;
110 typedef struct { double4 lo,hi; } v2double4;
111 typedef struct { double8 lo,hi; } v2double8;
112 typedef struct { double16 lo,hi; } v2double16;
113 
114 #endif
115 
116 typedef struct { float lo,hi; } v2float;
117 typedef struct { float2 lo,hi; } v2float2;
118 typedef struct { float3 lo,hi; } v2float3;
119 typedef struct { float4 lo,hi; } v2float4;
120 typedef struct { float8 lo,hi; } v2float8;
121 typedef struct { float16 lo,hi; } v2float16;
122 
123 // for PI tables sin / cos
124 typedef struct { uint s0, s1, s2, s3; } v4uint;
125 typedef struct { uint2 s0, s1, s2, s3; } v4uint2;
126 typedef struct { uint3 s0, s1, s2, s3; } v4uint3;
127 typedef struct { uint4 s0, s1, s2, s3; } v4uint4;
128 typedef struct { uint8 s0, s1, s2, s3; } v4uint8;
129 typedef struct { uint16 s0, s1, s2, s3; } v4uint16;
130 
131 // for PI tables sin / cos
132 typedef struct { int s0, s1, s2, s3; } v4int;
133 typedef struct { int2 s0, s1, s2, s3; } v4int2;
134 typedef struct { int3 s0, s1, s2, s3; } v4int3;
135 typedef struct { int4 s0, s1, s2, s3; } v4int4;
136 typedef struct { int8 s0, s1, s2, s3; } v4int8;
137 typedef struct { int16 s0, s1, s2, s3; } v4int16;
138 
139 
140 
141 #define OCML_ATTR __attribute__((always_inline, const, overloadable))
142 
143 #define ALIGNEDATTR(X) __attribute__((aligned(X)))
144 #define INLINEATTR __attribute__((always_inline))
145 #define PUREATTR __attribute__((pure))
146 #define CONSTATTR __attribute__((const))
147 
148 #define FMA fma
149 #define RCP(X) ((vtype)(1.0f) / X)
150 #define DIV(X,Y) (X / Y)
151 
152 #define LDEXP ldexp
153 #define SQRT sqrt
154 #define ISINF isinf
155 #define COPYSIGN copysign
156 #define MATH_FAST_RCP RCP
157 #define MATH_RCP RCP
158 #define MATH_MAD pocl_fma
159 
160 #define BUILTIN_ABS_F32 fabs
161 #define BUILTIN_TRUNC_F32 trunc
162 #define BUILTIN_FRACTION_F32 fract
163 #define BUILTIN_COPYSIGN_F32 copysign
164 #define BUILTIN_FMA_F32 fma
165 
166 #define BUILTIN_FREXP_MANT_F32 _cl_frfrexp
167 #define BUILTIN_FLDEXP_F32 ldexp
168 #define BUILTIN_FREXP_EXP_F32 _cl_expfrexp
169 #define BUILTIN_RINT_F32 rint
170 
171 #define BUILTIN_ABS_F64 fabs
172 #define BUILTIN_TRUNC_F64 trunc
173 #define BUILTIN_FRACTION_F64 fract
174 #define BUILTIN_COPYSIGN_F64 copysign
175 #define BUILTIN_FMA_F64 fma
176 
177 #define BUILTIN_FREXP_MANT_F64 _cl_frfrexp
178 #define BUILTIN_FLDEXP_F64 ldexp
179 #define BUILTIN_FREXP_EXP_F64 _cl_expfrexp
180 #define BUILTIN_RINT_F64 rint
181 
182 #define MATH_PRIVATE(NAME) __pocl_ ## NAME
183 #define MATH_MANGLE(NAME) _CL_OVERLOADABLE NAME
184 
185 #ifndef _CL_DECLARE_FUNC_V_VVV
186 #define _CL_DECLARE_FUNC_V_VVV(NAME)                                    \
187   __IF_FP16(                                                            \
188   half     _CL_OVERLOADABLE NAME(half    , half    , half  );           \
189   half2    _CL_OVERLOADABLE NAME(half2   , half2   , half2 );           \
190   half3    _CL_OVERLOADABLE NAME(half3   , half3   , half3 );           \
191   half4    _CL_OVERLOADABLE NAME(half4   , half4   , half4 );           \
192   half8    _CL_OVERLOADABLE NAME(half8   , half8   , half8 );           \
193   half16   _CL_OVERLOADABLE NAME(half16  , half16  , half16);)          \
194   float    _CL_OVERLOADABLE NAME(float   , float   , float   );         \
195   float2   _CL_OVERLOADABLE NAME(float2  , float2  , float2  );         \
196   float3   _CL_OVERLOADABLE NAME(float3  , float3  , float3  );         \
197   float4   _CL_OVERLOADABLE NAME(float4  , float4  , float4  );         \
198   float8   _CL_OVERLOADABLE NAME(float8  , float8  , float8  );         \
199   float16  _CL_OVERLOADABLE NAME(float16 , float16 , float16 );         \
200   __IF_FP64(                                                            \
201   double   _CL_OVERLOADABLE NAME(double  , double  , double  );         \
202   double2  _CL_OVERLOADABLE NAME(double2 , double2 , double2 );         \
203   double3  _CL_OVERLOADABLE NAME(double3 , double3 , double3 );         \
204   double4  _CL_OVERLOADABLE NAME(double4 , double4 , double4 );         \
205   double8  _CL_OVERLOADABLE NAME(double8 , double8 , double8 );         \
206   double16 _CL_OVERLOADABLE NAME(double16, double16, double16);)
207 #endif
208 
209 _CL_DECLARE_FUNC_V_VVV(pocl_fma)
210