1
2 /*
3 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19 #include <immintrin.h>
20 #include <common.h>
21
22 #if !(defined _CPU)
23 #error: please define _CPU - specific suffix to a function name
24 #endif
25
26 #define _JOIN2(a,b) a##b
27 #define JOIN2(a,b) _JOIN2(a,b)
28
29 #define log10_scalar JOIN2(__fs_log10_1_,_CPU)
30 #define FMAF __builtin_fmaf
31
32 extern "C" float log10_scalar(float);
33
34
log10_scalar(float a_input)35 float __attribute__ ((noinline)) log10_scalar(float a_input)
36 {
37 float a, m, e, b, t;
38 int mu, eu;
39
40 unsigned u = float_as_int(a_input);
41 u -= 0x800000;
42 if (__builtin_expect(u >= 0x7f000000, 0)) {
43 int exp_offset = 0;
44 if (a_input != a_input) return a_input + a_input; // NaN
45 if (a_input < 0.0f) return CANONICAL_NAN; // negative
46 if (a_input == 0.0f) return NINF; // zero
47 if (a_input == PINF) return PINF; // +infinity
48 a_input *= TWO_TO_24_F; // denormals
49 exp_offset += 24;
50 mu = float_as_int(a_input);
51 mu -= float_as_int(MAGIC_F_LEGACY[0]);
52 eu = (mu >> 23) - exp_offset;
53 mu &= MANTISSA_MASK[0];
54 mu += float_as_int(MAGIC_F_LEGACY[0]);
55 m = int_as_float(mu);
56 e = (float)eu;
57 goto core;
58 }
59 mu = float_as_int(a_input);
60 mu -= float_as_int(MAGIC_F_LEGACY[0]);
61 eu = mu >> 23;
62 mu &= MANTISSA_MASK[0];
63 mu += float_as_int(MAGIC_F_LEGACY[0]);
64 m = int_as_float(mu);
65 e = (float)eu;
66 core:
67 e = e * LOG10_2_F[0];
68
69 m = m - 1.0f;
70
71 t = c0[0];
72 t = FMAF(t, m, c1[0]);
73 t = FMAF(t, m, c2[0]);
74 t = FMAF(t, m, c3[0]);
75 t = FMAF(t, m, c4[0]);
76 t = FMAF(t, m, c5[0]);
77 t = FMAF(t, m, c6[0]);
78 t = FMAF(t, m, c7[0]);
79 t = FMAF(t, m, c8[0]);
80 t = FMAF(t, m, e);
81
82 return t;
83 }
84
85