1 
2 /*
3  * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 
20 #include <immintrin.h>
21 #include <common.h>
22 
23 #if !(defined _CPU)
24 #error: please define _CPU - specific suffix to a function name
25 #endif
26 
27 #define _JOIN2(a,b) a##b
28 #define JOIN2(a,b) _JOIN2(a,b)
29 
30 #define log_scalar JOIN2(__fs_log_1_,_CPU)
31 #define FMAF __builtin_fmaf
32 
33 extern "C" float log_scalar(float);
34 
35 
log_scalar(float a_input)36 float __attribute__ ((noinline)) log_scalar(float a_input)
37 {
38     float a, m, m2, e, b, t;
39     int mu, eu;
40 
41     unsigned u = float_as_int(a_input);
42     u -= 0x800000;
43     if (__builtin_expect(u >= 0x7f000000, 0)) {
44         int exp_offset = 0;
45         if (a_input != a_input) return a_input + a_input; // NaN
46         if (a_input < 0.0f) return CANONICAL_NAN; // negative
47         if (a_input == 0.0f) return NINF; // zero
48         if (a_input == PINF) return PINF; // +infinity
49         a_input *= TWO_TO_24_F; // denormals
50         exp_offset += 24;
51         mu = float_as_int(a_input);
52         mu -= float_as_int(MAGIC_F[0]);
53         eu = (mu >> 23) - exp_offset;
54         mu &= MANTISSA_MASK[0];
55         mu += float_as_int(MAGIC_F[0]);
56         m = int_as_float(mu);
57         e = (float)eu;
58         goto core;
59     }
60     mu = float_as_int(a_input);
61     mu -= float_as_int(MAGIC_F[0]);
62     eu = mu >> 23;
63     mu &= MANTISSA_MASK[0];
64     mu += float_as_int(MAGIC_F[0]);
65     m = int_as_float(mu);
66     e = (float)eu;
67 core:
68     m = m - 1.0f;
69     m2 = m * m;
70 
71     t = c0[0];
72     t = FMAF(t, m, c1[0]);
73     t = FMAF(t, m, c2[0]);
74     t = FMAF(t, m, c3[0]);
75     t = FMAF(t, m, c4[0]);
76     t = FMAF(t, m, c5[0]);
77     t = FMAF(t, m, c6[0]);
78     t = FMAF(t, m, c7[0]);
79     t = FMAF(t, m, c8[0]);
80     t = FMAF(t, m, c9[0]);
81 
82     t = FMAF(t, m2, m);
83     t = FMAF(e, LOG_2_F[0], t);
84 
85     return t;
86 }
87 
88