1/*========================== begin_copyright_notice ============================
2
3Copyright (C) 2020-2021 Intel Corporation
4
5SPDX-License-Identifier: MIT
6
7============================= end_copyright_notice ===========================*/
8
9#include "../imf.h"
10#pragma OPENCL FP_CONTRACT OFF
11typedef struct
12{
13    unsigned int Exp_tbl_L[32];
14    unsigned int Exp_tbl_H[32];
15
16    unsigned int L2E;
17    unsigned int Shifter;
18    unsigned int L2H;
19    unsigned int L2L;
20    unsigned int EMask;
21
22    unsigned int AbsMask;
23    unsigned int Threshold;
24    unsigned int SmallX;
25    unsigned int IndexMask;
26
27    unsigned int IndexMask2;
28    unsigned int Zero;
29    unsigned int knc_Shifter;
30    unsigned int knc_L2EH;
31    unsigned int knc_L2EL;
32    unsigned int knc_EMask;
33
34} __internal_sexp_la_data_avx512_t;
35static __constant __internal_sexp_la_data_avx512_t __internal_sexp_la_data_avx512 = {
36    {
37     0x3f800001u, 0x3f801631u, 0x3f802c65u, 0x3f80429du,
38     0x3f8058d9u, 0x3f806f18u, 0x3f80855cu, 0x3f809ba3u,
39     0x3f80b1eeu, 0x3f80c83du, 0x3f80de90u, 0x3f80f4e7u,
40     0x3f810b42u, 0x3f8121a0u, 0x3f813803u, 0x3f814e69u,
41     0x3f8164d3u, 0x3f817b41u, 0x3f8191b3u, 0x3f81a829u,
42     0x3f81bea2u, 0x3f81d520u, 0x3f81eba2u, 0x3f820227u,
43     0x3f8218b0u, 0x3f822f3du, 0x3f8245cfu, 0x3f825c64u,
44     0x3f8272fdu, 0x3f828999u, 0x3f82a03au, 0x3f82b6dfu,
45     }
46    , {
47       0x3f800000u, 0x3f82cd87u, 0x3f85aac3u, 0x3f88980fu,
48       0x3f8b95c2u, 0x3f8ea43au, 0x3f91c3d3u, 0x3f94f4f0u,
49       0x3f9837f0u, 0x3f9b8d3au, 0x3f9ef532u, 0x3fa27043u,
50       0x3fa5fed7u, 0x3fa9a15bu, 0x3fad583fu, 0x3fb123f6u,
51       0x3fb504f3u, 0x3fb8fbafu, 0x3fbd08a4u, 0x3fc12c4du,
52       0x3fc5672au, 0x3fc9b9beu, 0x3fce248cu, 0x3fd2a81eu,
53       0x3fd744fdu, 0x3fdbfbb8u, 0x3fe0ccdfu, 0x3fe5b907u,
54       0x3feac0c7u, 0x3fefe4bau, 0x3ff5257du, 0x3ffa83b3u,
55       }
56
57    , 0x3fB8AA3Bu, 0x46400000u, 0x3f317218u, 0xb102e308u, 0x3f000000u, 0x7fffffffu, 0x42AEAC4Fu, 0x2f800000u, 0x0000007cu, 0x00000f80u, 0x00000000u,
58        0x4b400000u, 0x3fb8aa3bu, 0x32a57060u, 0xbfffffffu
59};
60
61typedef struct
62{
63    unsigned int _sInvLn2;
64    unsigned int _sShifter;
65    unsigned int _sLn2hi;
66    unsigned int _sLn2lo;
67    unsigned int _iBias;
68
69    unsigned int _sPC0;
70    unsigned int _sPC1;
71    unsigned int _sPC2;
72    unsigned int _sPC3;
73    unsigned int _sPC4;
74    unsigned int _sPC5;
75    unsigned int _iAbsMask;
76    unsigned int _iDomainRange;
77    unsigned int _sOvfThreshold;
78    unsigned int _sUdfThreshold;
79} __internal_sexp_la_data_t;
80static __constant __internal_sexp_la_data_t __internal_sexp_la_data = {
81    0x3FB8AA3Bu,
82    0x4b400000u,
83    0x3F317200u,
84    0x35BFBE8Eu,
85    0x0000007fu,
86
87    0x3F800000u,
88    0x3F7FFFFEu,
89    0x3EFFFF34u,
90    0x3E2AACACu,
91    0x3D2B8392u,
92    0x3C07D9FEu,
93    0x7fffffffu,
94    0x42aeac4fu,
95
96    0x42b17217u,
97    0xc2cff1b4u,
98};
99
100static __constant union
101{
102    unsigned int w;
103    float f;
104} __sexp_la_Shifter = { 0x4ac000feu };
105
106static __constant union
107{
108    unsigned int w;
109    float f;
110} __sexp_la_L2E = { 0x3FB8AA3Bu };
111
112static __constant union
113{
114    unsigned int w;
115    float f;
116} __sexp_la_L2H = { 0x3f317218u };
117
118static __constant union
119{
120    unsigned int w;
121    float f;
122} __sexp_la_L2L = { 0xb102E308u };
123
124static __constant union
125{
126    unsigned int w;
127    float f;
128} __sexp_la_c5 = { 0x3c08ba8bu };
129
130static __constant union
131{
132    unsigned int w;
133    float f;
134} __sexp_la_c4 = { 0x3d2aec4eu };
135
136static __constant union
137{
138    unsigned int w;
139    float f;
140} __sexp_la_c3 = { 0x3e2aaa9cu };
141
142static __constant union
143{
144    unsigned int w;
145    float f;
146} __sexp_la_c2 = { 0x3effffe8u };
147
148static __constant union
149{
150    unsigned int w;
151    float f;
152} __sexp_la_c1 = { 0x3f800000u };
153
154__attribute__((always_inline))
155inline int __internal_sexp_la_cout (float *a, float *r)
156{
157    int nRet = 0;
158    float x = *a;
159    union
160    {
161        unsigned int w;
162        float f;
163    } S, Th, Tlr, Th2, xin, xa, res;
164    float N, R, poly;
165    int index_mask;
166
167    S.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (x, __sexp_la_L2E.f, __sexp_la_Shifter.f);
168    N = S.f - __sexp_la_Shifter.f;
169
170    R = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-N, __sexp_la_L2H.f, x);
171    R = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-N, __sexp_la_L2L.f, R);
172
173    Th.w = S.w << 22;
174
175    index_mask = 0 - (S.w & 1);
176
177    Th.w ^= (index_mask & 0x7504F3u);
178
179    Tlr.w = index_mask & 0x329302AEu;
180
181    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, __sexp_la_c5.f, __sexp_la_c4.f);
182    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, __sexp_la_c3.f);
183    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, __sexp_la_c2.f);
184    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, __sexp_la_c1.f);
185    poly = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (R, poly, Tlr.f);
186
187    xin.f = x;
188    xa.w = xin.w & 0x7fffffffu;
189
190    if (xa.w > 0x42AEAC4Fu)
191        goto EXPF_SPECIAL;
192
193    res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, Th.f, Th.f);
194
195    *r = res.f;
196    return nRet;
197
198  EXPF_SPECIAL:
199    if (xa.w > 0x432EAC4Fu)
200    {
201        if (xa.w > 0x7f800000u)
202        {
203            *r = x + x;
204            return nRet;
205        }
206
207        if (x < 0)
208        {
209            *r = 0.0f;
210            nRet = 4;
211            return nRet;
212        }
213
214        res.w = 0x7f800000;
215        *r = res.f;
216        nRet = 3;
217        return nRet;
218    }
219
220    S.w += 0xfe;
221    Th2.w = (S.w >> 2) & 0xff;
222    S.w -= (Th2.w << 1);
223
224    Th2.w <<= 23;
225
226    Th.w = S.w << 22;
227
228    Th.w ^= (index_mask & 0x7504F3u);
229
230    res.f = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (poly, Th.f, Th.f);
231    res.f *= Th2.f;
232
233    *r = res.f;
234    return nRet;
235}
236
237float __ocl_svml_expf (float a)
238{
239
240    float va1;
241    float vr1;
242    unsigned int vm;
243
244    float r;
245
246    va1 = a;;
247
248    __internal_sexp_la_cout (&va1, &vr1);
249    r = vr1;;
250
251    return r;
252
253}
254