1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 17 дек. 2018 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <dsp/dsp.h>
23 #include <test/test.h>
24 
25 #include <core/types.h>
26 #include <core/debug.h>
27 
28 #include <dsp/arch/x86/features.h>
29 
30 #define DSP_ARCH_X86_AVX2_IMPL
31 
32 #include <dsp/arch/x86/avx2/float.h>
33 
34 #include <dsp/arch/x86/avx2/pmath/op_kx.h>
35 #include <dsp/arch/x86/avx2/pmath/fmop_kx.h>
36 #include <dsp/arch/x86/avx2/pmath/exp.h>
37 #include <dsp/arch/x86/avx2/pmath/log.h>
38 #include <dsp/arch/x86/avx2/pmath/pow.h>
39 
40 #include <dsp/arch/x86/avx2/fft/normalize.h>
41 
42 #include <dsp/arch/x86/avx2/search/iminmax.h>
43 
44 #include <dsp/arch/x86/avx2/graphics/transpose.h>
45 #include <dsp/arch/x86/avx2/graphics/effects.h>
46 
47 #undef DSP_ARCH_X86_AVX2_IMPL
48 
49 namespace avx2
50 {
51     using namespace x86;
52 
53     #define EXPORT2(function, export)               { dsp::function = avx2::export; TEST_EXPORT(avx2::export); }
54     #define EXPORT1(function)                       EXPORT2(function, function)
55 
56     #define EXPORT2_X64(function, export)           IF_ARCH_X86_64(EXPORT2(function, export));
57     #define SUPPORT_X64(function)                   IF_ARCH_X86_64(TEST_EXPORT(avx2::function))
58 
59     #define CEXPORT2(cond, function, export)    \
60     IF_ARCH_X86( \
61             TEST_EXPORT(avx2::export); \
62             if (cond) \
63                 dsp::function = avx2::export; \
64         );
65 
66     #define CEXPORT1(cond, export)    \
67     IF_ARCH_X86( \
68             TEST_EXPORT(avx2::export); \
69             if (cond) \
70                 dsp::export = avx2::export; \
71         );
72 
73     #define CEXPORT2_X64(cond, function, export)    \
74         IF_ARCH_X86_64( \
75                 TEST_EXPORT(avx2::export); \
76                 if (cond) \
77                     dsp::function = avx2::export; \
78             );
79 
80     #define CEXPORT1_X64(cond, export)    \
81         IF_ARCH_X86_64( \
82                 TEST_EXPORT(avx2::export); \
83                 if (cond) \
84                     dsp::export = avx2::export; \
85             );
86 
dsp_init(const cpu_features_t * f)87     void dsp_init(const cpu_features_t *f)
88     {
89         if ((f->features & (CPU_OPTION_AVX | CPU_OPTION_AVX2)) != (CPU_OPTION_AVX | CPU_OPTION_AVX2))
90             return;
91 
92         lsp_trace("Optimizing DSP for AVX2 instruction set");
93 
94         bool favx   = feature_check(f, FEAT_FAST_AVX);
95 
96         CEXPORT1(favx, limit_saturate1);
97         CEXPORT1(favx, limit_saturate2);
98         CEXPORT1(favx, copy_saturated);
99         CEXPORT1(favx, saturate);
100         CEXPORT1(favx, sanitize1);
101         CEXPORT1(favx, sanitize2);
102 
103         CEXPORT1(favx, add_k2);
104         CEXPORT1(favx, sub_k2);
105         CEXPORT1(favx, rsub_k2);
106         CEXPORT1(favx, mul_k2);
107         CEXPORT1(favx, div_k2);
108         CEXPORT1(favx, rdiv_k2);
109         CEXPORT1(favx, mod_k2);
110         CEXPORT1(favx, rmod_k2);
111 
112         CEXPORT1(favx, add_k3);
113         CEXPORT1(favx, sub_k3);
114         CEXPORT1(favx, rsub_k3);
115         CEXPORT1(favx, mul_k3);
116         CEXPORT1(favx, div_k3);
117         CEXPORT1(favx, rdiv_k3);
118         CEXPORT1(favx, mod_k3);
119         CEXPORT1(favx, rmod_k3);
120 
121         CEXPORT1(favx, fmadd_k3);
122         CEXPORT1(favx, fmsub_k3);
123         CEXPORT1(favx, fmrsub_k3);
124         CEXPORT1(favx, fmmul_k3);
125         CEXPORT1(favx, fmdiv_k3);
126         CEXPORT1(favx, fmrdiv_k3);
127         CEXPORT1(favx, fmmod_k3);
128         CEXPORT1(favx, fmrmod_k3);
129 
130         CEXPORT1(favx, fmadd_k4);
131         CEXPORT1(favx, fmsub_k4);
132         CEXPORT1(favx, fmrsub_k4);
133         CEXPORT1(favx, fmmul_k4);
134         CEXPORT1(favx, fmdiv_k4);
135         CEXPORT1(favx, fmrdiv_k4);
136         CEXPORT1(favx, fmmod_k4);
137         CEXPORT1(favx, fmrmod_k4);
138 
139         CEXPORT2_X64(favx, exp1, x64_exp1);
140         CEXPORT2_X64(favx, exp2, x64_exp2);
141 
142         CEXPORT2_X64(favx, logb1, x64_logb1);
143         CEXPORT2_X64(favx, logb2, x64_logb2);
144         CEXPORT2_X64(favx, loge1, x64_loge1);
145         CEXPORT2_X64(favx, loge2, x64_loge2);
146         CEXPORT2_X64(favx, logd1, x64_logd1);
147         CEXPORT2_X64(favx, logd2, x64_logd2);
148 
149         CEXPORT2_X64(favx, powcv1, x64_powcv1);
150         CEXPORT2_X64(favx, powcv2, x64_powcv2);
151         CEXPORT2_X64(favx, powvc1, x64_powvc1);
152         CEXPORT2_X64(favx, powvc2, x64_powvc2);
153         CEXPORT2_X64(favx, powvx1, x64_powvx1);
154         CEXPORT2_X64(favx, powvx2, x64_powvx2);
155 
156         CEXPORT2_X64(favx, eff_hsla_hue, x64_eff_hsla_hue);
157         CEXPORT2_X64(favx, eff_hsla_sat, x64_eff_hsla_sat);
158         CEXPORT2_X64(favx, eff_hsla_light, x64_eff_hsla_light);
159         CEXPORT2_X64(favx, eff_hsla_alpha, x64_eff_hsla_alpha);
160 
161         CEXPORT1(favx, normalize_fft2);
162         CEXPORT1(favx, normalize_fft3);
163 
164         if (f->features & CPU_OPTION_FMA3)
165         {
166             CEXPORT2_X64(favx, mod_k2, mod_k2_fma3);
167             CEXPORT2_X64(favx, rmod_k2, rmod_k2_fma3);
168 
169             CEXPORT2_X64(favx, mod_k3, mod_k3_fma3);
170             CEXPORT2_X64(favx, rmod_k3, rmod_k3_fma3);
171 
172             CEXPORT2_X64(favx, fmadd_k3, fmadd_k3_fma3);
173             CEXPORT2_X64(favx, fmsub_k3, fmsub_k3_fma3);
174             CEXPORT2_X64(favx, fmrsub_k3, fmrsub_k3_fma3);
175             CEXPORT2_X64(favx, fmmod_k3, fmmod_k3_fma3);
176             CEXPORT2_X64(favx, fmrmod_k3, fmrmod_k3_fma3);
177 
178             CEXPORT2_X64(favx, fmadd_k4, fmadd_k4_fma3);
179             CEXPORT2_X64(favx, fmsub_k4, fmsub_k4_fma3);
180             CEXPORT2_X64(favx, fmrsub_k4, fmrsub_k4_fma3);
181             CEXPORT2_X64(favx, fmmod_k4, fmmod_k4_fma3);
182             CEXPORT2_X64(favx, fmrmod_k4, fmrmod_k4_fma3);
183 
184             CEXPORT2_X64(favx, exp1, x64_exp1_fma3);
185             CEXPORT2_X64(favx, exp2, x64_exp2_fma3);
186 
187             CEXPORT2_X64(favx, logb1, x64_logb1_fma3);
188             CEXPORT2_X64(favx, logb2, x64_logb2_fma3);
189             CEXPORT2_X64(favx, loge1, x64_loge1_fma3);
190             CEXPORT2_X64(favx, loge2, x64_loge2_fma3);
191             CEXPORT2_X64(favx, logd1, x64_logd1_fma3);
192             CEXPORT2_X64(favx, logd2, x64_logd2_fma3);
193 
194             CEXPORT2_X64(favx, powcv1, x64_powcv1_fma3);
195             CEXPORT2_X64(favx, powcv2, x64_powcv2_fma3);
196             CEXPORT2_X64(favx, powvc1, x64_powvc1_fma3);
197             CEXPORT2_X64(favx, powvc2, x64_powvc2_fma3);
198             CEXPORT2_X64(favx, powvx1, x64_powvx1_fma3);
199             CEXPORT2_X64(favx, powvx2, x64_powvx2_fma3);
200         }
201     }
202 }
203