1
2 /*
3 * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19
20 #include <immintrin.h>
21 #include <common.h>
22
23 #if !(defined _CPU)
24 #error: please define _CPU - specific suffix to a function name
25 #endif
26
27 #define _JOIN2(a,b) a##b
28 #define JOIN2(a,b) _JOIN2(a,b)
29
30 #define log10_vec512 JOIN2(__fs_log10_16_,_CPU)
31
32 extern "C" __m512 log10_vec512(__m512);
33
log10_vec512(__m512 a)34 __m512 __attribute__ ((noinline)) log10_vec512(__m512 a)
35 {
36 __m512 m, e, b, t;
37 __m512i idx;
38
39 #ifdef __AVX512F__
40 m = _mm512_getmant_ps(a, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan);
41 e = _mm512_getexp_ps(a);
42 b = _mm512_getexp_ps(m);
43 e = _mm512_sub_ps(e, b);
44 e = _mm512_mul_ps(e, *(__m512*)LOG10_2_F);
45
46 idx = _mm512_srli_epi32((__m512i)m, 19);
47 m = _mm512_sub_ps(m, *(__m512*)ONE_F);
48
49 __m512 c0 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs0);
50 __m512 c1 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs1);
51 __m512 c2 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs2);
52 __m512 c3 = _mm512_permutexvar_ps(idx, *(__m512*)coeffs3);
53
54 t = c0;
55 t = _mm512_fmadd_ps(t, m, c1);
56 t = _mm512_fmadd_ps(t, m, c2);
57 t = _mm512_fmadd_ps(t, m, c3);
58 t = _mm512_fmadd_ps(t, m, e);
59 #else
60 #warning NO AVX512!
61 #endif
62
63 return t;
64 }
65