1 /* 2 * Copyright (c) 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 */ 22 23 #define SNAN 0x001 24 #define QNAN 0x002 25 #define NINF 0x004 26 #define NNOR 0x008 27 #define NSUB 0x010 28 #define NZER 0x020 29 #define PZER 0x040 30 #define PSUB 0x080 31 #define PNOR 0x100 32 #define PINF 0x200 33 34 #define HAVE_BITALIGN() (0) 35 36 #define MATH_DIVIDE(X, Y) ((X) / (Y)) 37 #define MATH_RECIP(X) (1.0f / (X)) 38 #define MATH_SQRT(X) sqrt(X) 39 40 #define SIGNBIT_SP32 0x80000000 41 #define EXSIGNBIT_SP32 0x7fffffff 42 #define EXPBITS_SP32 0x7f800000 43 #define MANTBITS_SP32 0x007fffff 44 #define MANTSIGNBITS_SP32 0x807fffff 45 #define ONEEXPBITS_SP32 0x3f800000 46 #define TWOEXPBITS_SP32 0x40000000 47 #define HALFEXPBITS_SP32 0x3f000000 48 #define IMPBIT_SP32 0x00800000 49 #define QNANBITPATT_SP32 0x7fc00000 50 #define INDEFBITPATT_SP32 0xffc00000 51 #define PINFBITPATT_SP32 0x7f800000 52 #define NINFBITPATT_SP32 0xff800000 53 #define EXPBIAS_SP32 127 54 #define EXPSHIFTBITS_SP32 23 55 #define BIASEDEMIN_SP32 1 56 #define EMIN_SP32 -126 57 #define BIASEDEMAX_SP32 254 58 #define EMAX_SP32 127 59 #define LAMBDA_SP32 1.0e30 60 #define MANTLENGTH_SP32 24 61 #define BASEDIGITS_SP32 7 62 #define ISNEG_SP32(x) (as_itype(x) & (itype)SIGNBIT_SP32) 63 #define vINFINITY_SP32 (as_vtype((utype)PINFBITPATT_SP32)) 64 #define vNINFINITY_SP32 (as_vtype((utype)NINFBITPATT_SP32)) 65 #define vNAN_SP32 (as_vtype((utype)QNANBITPATT_SP32)) 66 #define vZERO_SP32 (vtype)0.0f 67 #define vONE_SP32 (vtype)1.0f 68 69 #ifdef cl_khr_fp64 70 71 #define SIGNBIT_DP64 0x8000000000000000L 72 #define EXSIGNBIT_DP64 0x7fffffffffffffffL 73 #define EXPBITS_DP64 0x7ff0000000000000L 74 #define MANTBITS_DP64 0x000fffffffffffffL 75 #define MANTSIGNBITS_DP64 0x800fffffffffffffL 76 #define ONEEXPBITS_DP64 0x3ff0000000000000L 77 #define TWOEXPBITS_DP64 0x4000000000000000L 78 #define HALFEXPBITS_DP64 0x3fe0000000000000L 79 #define IMPBIT_DP64 0x0010000000000000L 80 #define QNANBITPATT_DP64 0x7ff8000000000000L 81 #define INDEFBITPATT_DP64 0xfff8000000000000L 82 #define PINFBITPATT_DP64 0x7ff0000000000000L 83 #define NINFBITPATT_DP64 0xfff0000000000000L 84 #define EXPBIAS_DP64 1023 85 #define EXPSHIFTBITS_DP64 52 86 #define BIASEDEMIN_DP64 1 87 #define EMIN_DP64 -1022 88 #define BIASEDEMAX_DP64 2046 /* 0x7fe */ 89 #define EMAX_DP64 1023 /* 0x3ff */ 90 #define LAMBDA_DP64 1.0e300 91 #define MANTLENGTH_DP64 53 92 #define BASEDIGITS_DP64 15 93 #define ISNEG_DP64(x) (as_itype(x) & (itype)SIGNBIT_DP64) 94 #define vINFINITY_DP64 (as_vtype((utype)PINFBITPATT_DP64)) 95 #define vNINFINITY_DP64 (as_vtype((utype)NINFBITPATT_DP64)) 96 #define vNAN_DP64 (as_vtype((utype)QNANBITPATT_DP64)) 97 #define vZERO_DP64 (vtype)0.0 98 #define vONE_DP64 (vtype)1.0 99 100 #endif // cl_khr_fp64 101 102 #define ALIGNED(x) __attribute__((aligned(x))) 103 104 105 #ifdef cl_khr_fp64 106 107 typedef struct { double lo,hi; } v2double; 108 typedef struct { double2 lo,hi; } v2double2; 109 typedef struct { double3 lo,hi; } v2double3; 110 typedef struct { double4 lo,hi; } v2double4; 111 typedef struct { double8 lo,hi; } v2double8; 112 typedef struct { double16 lo,hi; } v2double16; 113 114 #endif 115 116 typedef struct { float lo,hi; } v2float; 117 typedef struct { float2 lo,hi; } v2float2; 118 typedef struct { float3 lo,hi; } v2float3; 119 typedef struct { float4 lo,hi; } v2float4; 120 typedef struct { float8 lo,hi; } v2float8; 121 typedef struct { float16 lo,hi; } v2float16; 122 123 // for PI tables sin / cos 124 typedef struct { uint s0, s1, s2, s3; } v4uint; 125 typedef struct { uint2 s0, s1, s2, s3; } v4uint2; 126 typedef struct { uint3 s0, s1, s2, s3; } v4uint3; 127 typedef struct { uint4 s0, s1, s2, s3; } v4uint4; 128 typedef struct { uint8 s0, s1, s2, s3; } v4uint8; 129 typedef struct { uint16 s0, s1, s2, s3; } v4uint16; 130 131 // for PI tables sin / cos 132 typedef struct { int s0, s1, s2, s3; } v4int; 133 typedef struct { int2 s0, s1, s2, s3; } v4int2; 134 typedef struct { int3 s0, s1, s2, s3; } v4int3; 135 typedef struct { int4 s0, s1, s2, s3; } v4int4; 136 typedef struct { int8 s0, s1, s2, s3; } v4int8; 137 typedef struct { int16 s0, s1, s2, s3; } v4int16; 138 139 140 141 #define OCML_ATTR __attribute__((always_inline, const, overloadable)) 142 143 #define ALIGNEDATTR(X) __attribute__((aligned(X))) 144 #define INLINEATTR __attribute__((always_inline)) 145 #define PUREATTR __attribute__((pure)) 146 #define CONSTATTR __attribute__((const)) 147 148 #define FMA fma 149 #define RCP(X) ((vtype)(1.0f) / X) 150 #define DIV(X,Y) (X / Y) 151 152 #define LDEXP ldexp 153 #define SQRT sqrt 154 #define ISINF isinf 155 #define COPYSIGN copysign 156 #define MATH_FAST_RCP RCP 157 #define MATH_RCP RCP 158 #define MATH_MAD pocl_fma 159 160 #define BUILTIN_ABS_F32 fabs 161 #define BUILTIN_TRUNC_F32 trunc 162 #define BUILTIN_FRACTION_F32 fract 163 #define BUILTIN_COPYSIGN_F32 copysign 164 #define BUILTIN_FMA_F32 fma 165 166 #define BUILTIN_FREXP_MANT_F32 _cl_frfrexp 167 #define BUILTIN_FLDEXP_F32 ldexp 168 #define BUILTIN_FREXP_EXP_F32 _cl_expfrexp 169 #define BUILTIN_RINT_F32 rint 170 171 #define BUILTIN_ABS_F64 fabs 172 #define BUILTIN_TRUNC_F64 trunc 173 #define BUILTIN_FRACTION_F64 fract 174 #define BUILTIN_COPYSIGN_F64 copysign 175 #define BUILTIN_FMA_F64 fma 176 177 #define BUILTIN_FREXP_MANT_F64 _cl_frfrexp 178 #define BUILTIN_FLDEXP_F64 ldexp 179 #define BUILTIN_FREXP_EXP_F64 _cl_expfrexp 180 #define BUILTIN_RINT_F64 rint 181 182 #define MATH_PRIVATE(NAME) __pocl_ ## NAME 183 #define MATH_MANGLE(NAME) _CL_OVERLOADABLE NAME 184 185 #ifndef _CL_DECLARE_FUNC_V_VVV 186 #define _CL_DECLARE_FUNC_V_VVV(NAME) \ 187 __IF_FP16( \ 188 half _CL_OVERLOADABLE NAME(half , half , half ); \ 189 half2 _CL_OVERLOADABLE NAME(half2 , half2 , half2 ); \ 190 half3 _CL_OVERLOADABLE NAME(half3 , half3 , half3 ); \ 191 half4 _CL_OVERLOADABLE NAME(half4 , half4 , half4 ); \ 192 half8 _CL_OVERLOADABLE NAME(half8 , half8 , half8 ); \ 193 half16 _CL_OVERLOADABLE NAME(half16 , half16 , half16);) \ 194 float _CL_OVERLOADABLE NAME(float , float , float ); \ 195 float2 _CL_OVERLOADABLE NAME(float2 , float2 , float2 ); \ 196 float3 _CL_OVERLOADABLE NAME(float3 , float3 , float3 ); \ 197 float4 _CL_OVERLOADABLE NAME(float4 , float4 , float4 ); \ 198 float8 _CL_OVERLOADABLE NAME(float8 , float8 , float8 ); \ 199 float16 _CL_OVERLOADABLE NAME(float16 , float16 , float16 ); \ 200 __IF_FP64( \ 201 double _CL_OVERLOADABLE NAME(double , double , double ); \ 202 double2 _CL_OVERLOADABLE NAME(double2 , double2 , double2 ); \ 203 double3 _CL_OVERLOADABLE NAME(double3 , double3 , double3 ); \ 204 double4 _CL_OVERLOADABLE NAME(double4 , double4 , double4 ); \ 205 double8 _CL_OVERLOADABLE NAME(double8 , double8 , double8 ); \ 206 double16 _CL_OVERLOADABLE NAME(double16, double16, double16);) 207 #endif 208 209 _CL_DECLARE_FUNC_V_VVV(pocl_fma) 210