1
2 /*
3 * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19
20 #include <immintrin.h>
21 #include <common.h>
22
23 #if !(defined _CPU)
24 #error: please define _CPU - specific suffix to a function name
25 #endif
26
27 #define _JOIN2(a,b) a##b
28 #define JOIN2(a,b) _JOIN2(a,b)
29
30 #define log_scalar JOIN2(__fs_log_1_,_CPU)
31 #define FMAF __builtin_fmaf
32
33 extern "C" float log_scalar(float);
34
35
log_scalar(float a_input)36 float __attribute__ ((noinline)) log_scalar(float a_input)
37 {
38 float a, m, m2, e, b, t;
39 int mu, eu;
40
41 unsigned u = float_as_int(a_input);
42 u -= 0x800000;
43 if (__builtin_expect(u >= 0x7f000000, 0)) {
44 int exp_offset = 0;
45 if (a_input != a_input) return a_input + a_input; // NaN
46 if (a_input < 0.0f) return CANONICAL_NAN; // negative
47 if (a_input == 0.0f) return NINF; // zero
48 if (a_input == PINF) return PINF; // +infinity
49 a_input *= TWO_TO_24_F; // denormals
50 exp_offset += 24;
51 mu = float_as_int(a_input);
52 mu -= float_as_int(MAGIC_F[0]);
53 eu = (mu >> 23) - exp_offset;
54 mu &= MANTISSA_MASK[0];
55 mu += float_as_int(MAGIC_F[0]);
56 m = int_as_float(mu);
57 e = (float)eu;
58 goto core;
59 }
60 mu = float_as_int(a_input);
61 mu -= float_as_int(MAGIC_F[0]);
62 eu = mu >> 23;
63 mu &= MANTISSA_MASK[0];
64 mu += float_as_int(MAGIC_F[0]);
65 m = int_as_float(mu);
66 e = (float)eu;
67 core:
68 m = m - 1.0f;
69 m2 = m * m;
70
71 t = c0[0];
72 t = FMAF(t, m, c1[0]);
73 t = FMAF(t, m, c2[0]);
74 t = FMAF(t, m, c3[0]);
75 t = FMAF(t, m, c4[0]);
76 t = FMAF(t, m, c5[0]);
77 t = FMAF(t, m, c6[0]);
78 t = FMAF(t, m, c7[0]);
79 t = FMAF(t, m, c8[0]);
80 t = FMAF(t, m, c9[0]);
81
82 t = FMAF(t, m2, m);
83 t = FMAF(e, LOG_2_F[0], t);
84
85 return t;
86 }
87
88