1/*========================== begin_copyright_notice ============================ 2 3Copyright (C) 2020-2021 Intel Corporation 4 5SPDX-License-Identifier: MIT 6 7============================= end_copyright_notice ===========================*/ 8 9#include "../imf.h" 10#pragma OPENCL FP_CONTRACT OFF 11typedef struct 12{ 13 unsigned int Exp_tbl_L[32]; 14 unsigned int Exp_tbl_H[32]; 15 16 unsigned int L2E; 17 unsigned int Shifter; 18 unsigned int L2H; 19 unsigned int L2L; 20 unsigned int EMask; 21 22 unsigned int AbsMask; 23 unsigned int Threshold; 24 unsigned int SmallX; 25 unsigned int IndexMask; 26 27 unsigned int IndexMask2; 28 unsigned int Zero; 29 unsigned int knc_Shifter; 30 unsigned int knc_L2EH; 31 unsigned int knc_L2EL; 32 unsigned int knc_EMask; 33 34} __internal_sexp_la_data_avx512_t; 35static __constant __internal_sexp_la_data_avx512_t __internal_sexp_la_data_avx512 = { 36 { 37 0x3f800001u, 0x3f801631u, 0x3f802c65u, 0x3f80429du, 38 0x3f8058d9u, 0x3f806f18u, 0x3f80855cu, 0x3f809ba3u, 39 0x3f80b1eeu, 0x3f80c83du, 0x3f80de90u, 0x3f80f4e7u, 40 0x3f810b42u, 0x3f8121a0u, 0x3f813803u, 0x3f814e69u, 41 0x3f8164d3u, 0x3f817b41u, 0x3f8191b3u, 0x3f81a829u, 42 0x3f81bea2u, 0x3f81d520u, 0x3f81eba2u, 0x3f820227u, 43 0x3f8218b0u, 0x3f822f3du, 0x3f8245cfu, 0x3f825c64u, 44 0x3f8272fdu, 0x3f828999u, 0x3f82a03au, 0x3f82b6dfu, 45 } 46 , { 47 0x3f800000u, 0x3f82cd87u, 0x3f85aac3u, 0x3f88980fu, 48 0x3f8b95c2u, 0x3f8ea43au, 0x3f91c3d3u, 0x3f94f4f0u, 49 0x3f9837f0u, 0x3f9b8d3au, 0x3f9ef532u, 0x3fa27043u, 50 0x3fa5fed7u, 0x3fa9a15bu, 0x3fad583fu, 0x3fb123f6u, 51 0x3fb504f3u, 0x3fb8fbafu, 0x3fbd08a4u, 0x3fc12c4du, 52 0x3fc5672au, 0x3fc9b9beu, 0x3fce248cu, 0x3fd2a81eu, 53 0x3fd744fdu, 0x3fdbfbb8u, 0x3fe0ccdfu, 0x3fe5b907u, 54 0x3feac0c7u, 0x3fefe4bau, 0x3ff5257du, 0x3ffa83b3u, 55 } 56 57 , 0x3fB8AA3Bu, 0x46400000u, 0x3f317218u, 0xb102e308u, 0x3f000000u, 0x7fffffffu, 0x42AEAC4Fu, 0x2f800000u, 0x0000007cu, 0x00000f80u, 0x00000000u, 58 0x4b400000u, 0x3fb8aa3bu, 0x32a57060u, 0xbfffffffu 59}; 60 61typedef struct 62{ 63 unsigned int _sInvLn2; 64 unsigned int _sShifter; 65 unsigned int _sLn2hi; 66 unsigned int _sLn2lo; 67 unsigned int _iBias; 68 69 unsigned int _sPC0; 70 unsigned int _sPC1; 71 unsigned int _sPC2; 72 unsigned int _sPC3; 73 unsigned int _sPC4; 74 unsigned int _sPC5; 75 unsigned int _iAbsMask; 76 unsigned int _iDomainRange; 77 unsigned int _sOvfThreshold; 78 unsigned int _sUdfThreshold; 79} __internal_sexp_la_data_t; 80static __constant __internal_sexp_la_data_t __internal_sexp_la_data = { 81 0x3FB8AA3Bu, 82 0x4b400000u, 83 0x3F317200u, 84 0x35BFBE8Eu, 85 0x0000007fu, 86 87 0x3F800000u, 88 0x3F7FFFFEu, 89 0x3EFFFF34u, 90 0x3E2AACACu, 91 0x3D2B8392u, 92 0x3C07D9FEu, 93 0x7fffffffu, 94 0x42aeac4fu, 95 96 0x42b17217u, 97 0xc2cff1b4u, 98}; 99 100static __constant union 101{ 102 unsigned int w; 103 float f; 104} __sexp_la_Shifter = { 0x4ac000feu }; 105 106static __constant union 107{ 108 unsigned int w; 109 float f; 110} __sexp_la_L2E = { 0x3FB8AA3Bu }; 111 112static __constant union 113{ 114 unsigned int w; 115 float f; 116} __sexp_la_L2H = { 0x3f317218u }; 117 118static __constant union 119{ 120 unsigned int w; 121 float f; 122} __sexp_la_L2L = { 0xb102E308u }; 123 124static __constant union 125{ 126 unsigned int w; 127 float f; 128} __sexp_la_c5 = { 0x3c08ba8bu }; 129 130static __constant union 131{ 132 unsigned int w; 133 float f; 134} __sexp_la_c4 = { 0x3d2aec4eu }; 135 136static __constant union 137{ 138 unsigned int w; 139 float f; 140} __sexp_la_c3 = { 0x3e2aaa9cu }; 141 142static __constant union 143{ 144 unsigned int w; 145 float f; 146} __sexp_la_c2 = { 0x3effffe8u }; 147 148static __constant union 149{ 150 unsigned int w; 151 float f; 152} __sexp_la_c1 = { 0x3f800000u }; 153 154__attribute__((always_inline)) 155inline int __internal_sexp_la_cout (float *a, float *r) 156{ 157 int nRet = 0; 158 float x = *a; 159 union 160 { 161 unsigned int w; 162 float f; 163 } S, Th, Tlr, Th2, xin, xa, res; 164 float N, R, poly; 165 int index_mask; 166 167 S.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (x, __sexp_la_L2E.f, __sexp_la_Shifter.f); 168 N = S.f - __sexp_la_Shifter.f; 169 170 R = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-N, __sexp_la_L2H.f, x); 171 R = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-N, __sexp_la_L2L.f, R); 172 173 Th.w = S.w << 22; 174 175 index_mask = 0 - (S.w & 1); 176 177 Th.w ^= (index_mask & 0x7504F3u); 178 179 Tlr.w = index_mask & 0x329302AEu; 180 181 poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, __sexp_la_c5.f, __sexp_la_c4.f); 182 poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, __sexp_la_c3.f); 183 poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, __sexp_la_c2.f); 184 poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, __sexp_la_c1.f); 185 poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, Tlr.f); 186 187 xin.f = x; 188 xa.w = xin.w & 0x7fffffffu; 189 190 if (xa.w > 0x42AEAC4Fu) 191 goto EXPF_SPECIAL; 192 193 res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, Th.f, Th.f); 194 195 *r = res.f; 196 return nRet; 197 198 EXPF_SPECIAL: 199 if (xa.w > 0x432EAC4Fu) 200 { 201 if (xa.w > 0x7f800000u) 202 { 203 *r = x + x; 204 return nRet; 205 } 206 207 if (x < 0) 208 { 209 *r = 0.0f; 210 nRet = 4; 211 return nRet; 212 } 213 214 res.w = 0x7f800000; 215 *r = res.f; 216 nRet = 3; 217 return nRet; 218 } 219 220 S.w += 0xfe; 221 Th2.w = (S.w >> 2) & 0xff; 222 S.w -= (Th2.w << 1); 223 224 Th2.w <<= 23; 225 226 Th.w = S.w << 22; 227 228 Th.w ^= (index_mask & 0x7504F3u); 229 230 res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, Th.f, Th.f); 231 res.f *= Th2.f; 232 233 *r = res.f; 234 return nRet; 235} 236 237float __ocl_svml_expf (float a) 238{ 239 240 float va1; 241 float vr1; 242 unsigned int vm; 243 244 float r; 245 246 va1 = a;; 247 248 __internal_sexp_la_cout (&va1, &vr1); 249 r = vr1;; 250 251 return r; 252 253} 254