1 
2 /*
3  * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 
20 #include <immintrin.h>
21 #include <common.h>
22 
23 #if !(defined _CPU)
24 #error: please define _CPU - specific suffix to a function name
25 #endif
26 
27 #define _JOIN2(a,b) a##b
28 #define JOIN2(a,b) _JOIN2(a,b)
29 
30 #define log10_vec512 JOIN2(__fs_log10_16_,_CPU)
31 
32 extern "C" __m512 log10_vec512(__m512);
33 
log10_vec512(__m512 a)34 __m512 __attribute__ ((noinline)) log10_vec512(__m512 a)
35 {
36     __m512 m, e, b, t;
37     __m512i idx;
38 
39 #ifdef __AVX512F__
40     m = _mm512_getmant_ps(a, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan);
41     e = _mm512_getexp_ps(a);
42     b = _mm512_getexp_ps(m);
43     e = _mm512_sub_ps(e, b);
44     e = _mm512_mul_ps(e, *(__m512*)LOG10_2_F);
45 
46     idx = _mm512_srli_epi32((__m512i)m, 19);
47     m = _mm512_sub_ps(m, *(__m512*)ONE_F);
48 
49     __m512 c0 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs0);
50     __m512 c1 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs1);
51     __m512 c2 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs2);
52     __m512 c3 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs3);
53 
54     t = c0;
55     t = _mm512_fmadd_ps(t, m, c1);
56     t = _mm512_fmadd_ps(t, m, c2);
57     t = _mm512_fmadd_ps(t, m, c3);
58     t = _mm512_fmadd_ps(t, m, e);
59 #else
60 #warning NO AVX512!
61 #endif
62 
63     return t;
64 }
65