1
2 /*
3 * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19
20
21 #include <immintrin.h>
22 #include <common.h>
23
24 #define FMA __builtin_fma
25
26 #if !(defined _CPU)
27 #error: please define _CPU - specific suffix to a function name
28 #endif
29
30 #define _JOIN2(a,b) a##b
31 #define JOIN2(a,b) _JOIN2(a,b)
32
33 #define log10_scalar JOIN2(__fd_log10_1_,_CPU)
34
35
36 extern "C" double log10_scalar(double);
37
38
__log10_d_scalar_kernel(double m,double e)39 static double __attribute__ ((always_inline)) inline __log10_d_scalar_kernel(double m, double e)
40 {
41 e = e * LOG10_2[0];
42 m = m - 1.0;
43
44 double m2 = m * m;
45 double m4 = m2 * m2;
46 double m5 = m4 * m;
47 double m8 = m4 * m4;
48
49 double t0 = FMA(c0[0], m, c1[0]);
50 double t1 = FMA(c2[0], m, c3[0]);
51 double t2 = FMA(c4[0], m, c5[0]);
52 double t3 = FMA(c6[0], m, c7[0]);
53 double t4 = FMA(c8[0], m, c9[0]);
54 double t5 = FMA(c10[0], m, c11[0]);
55 double t6 = FMA(c12[0], m, c13[0]);
56 double t7 = FMA(c14[0], m, c15[0]);
57
58 double t = c16[0];
59 t = FMA(t, m, c17[0]);
60 t = FMA(t, m, c18[0]);
61 t = FMA(t, m, c19[0]);
62 t = FMA(t, m, e);
63
64 t0 = FMA(t0, m2, t1);
65 t2 = FMA(t2, m2, t3);
66 t4 = FMA(t4, m2, t5);
67 t6 = FMA(t6, m2, t7);
68 t0 = FMA(t0, m4, t2);
69 t4 = FMA(t4, m4, t6);
70 t0 = FMA(t0, m8, t4);
71
72 t = FMA(t0, m5, t);
73
74 return t;
75 }
76
log10_scalar(double a_input)77 double __attribute__ ((noinline)) log10_scalar(double a_input)
78 {
79 __m128d va, vm, ve, vb;
80 double a, m, e, b, t;
81 long long mu, eu;
82
83
84 #ifdef __AVX512F__
85 va = _mm_set_sd(a_input);
86 vm = _mm_getmant_sd(va, va, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan);
87 ve = _mm_getexp_sd(va, va);
88 vb = _mm_getexp_sd(vm, vm);
89 ve = _mm_sub_sd(ve, vb);
90 m = _mm_cvtsd_f64(vm);
91 e = _mm_cvtsd_f64(ve);
92 #else
93 int exp_offset = 1023;
94 unsigned long long u = double_as_ll(a_input);
95 u -= 0x10000000000000LL;
96 if (__builtin_expect(u >= 0x7fe0000000000000LL, 0)) {
97 if (a_input != a_input) return a_input + a_input; // NaN
98 if (a_input < 0.0) return ll_as_double(CANONICAL_NAN[0]); // negative
99 if (a_input == 0.0) return ll_as_double(NINF[0]); // zero
100 if (double_as_ll(a_input) == PINF[0]) return ll_as_double(PINF[0]); // +infinity
101 a_input *= TWO_TO_53; // denormals
102 exp_offset += 53;
103 mu = double_as_ll(a_input);
104 mu -= double_as_ll(THRESHOLD[0]);
105 eu = (mu >> 52) - 53;
106 mu &= MANTISSA_MASK[0];
107 mu += double_as_ll(THRESHOLD[0]);
108 m = ll_as_double(mu);
109 e = (double)eu;
110 return __log10_d_scalar_kernel(m, e);
111 }
112 mu = double_as_ll(a_input);
113 mu -= double_as_ll(THRESHOLD[0]);
114 eu = mu >> 52;
115 mu &= MANTISSA_MASK[0];
116 mu += double_as_ll(THRESHOLD[0]);
117 m = ll_as_double(mu);
118 e = (double)eu;
119 #endif
120
121 return __log10_d_scalar_kernel(m, e);
122 }
123
124