1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 9 окт. 2018 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <dsp/dsp.h>
23 #include <dsp/bits.h>
24 #include <core/types.h>
25 #include <core/debug.h>
26 #include <test/test.h>
27 
28 #include <dsp/arch/x86/features.h>
29 
30 #define DSP_ARCH_X86_SSE2_IMPL
31 
32 namespace sse2 // TODO: make constants common for all architectures
33 {
34     //-------------------------------------------------------------------------
35     // Constants definition
36     #define DSP_F32VEC4(name, v)        static const float name[] __lsp_aligned16          = { v, v, v, v }
37     #define DSP_U32VEC4(name, v)        static const uint32_t name[] __lsp_aligned16       = { uint32_t(v), uint32_t(v), uint32_t(v), uint32_t(v) }
38 
39     #define DSP_F32VECX4(name, a, b, c, d)  static const float name[] __lsp_aligned16      = { a, b, c, d }
40     #define DSP_U32VECX4(name, a, b, c, d)  static const uint32_t name[] __lsp_aligned16   = { uint32_t(a), uint32_t(b), uint32_t(c), uint32_t(d) }
41 
42     #define DSP_F32REP4(v)              v, v, v, v
43     #define DSP_U32REP4(v)              uint32_t(v), uint32_t(v), uint32_t(v), uint32_t(v)
44 
45     #define DSP_F32ARRAY(name, ...)     static const float name[] __lsp_aligned16          = { __VA_ARGS__ }
46     #define DSP_U32ARRAY(name, ...)     static const uint32_t name[] __lsp_aligned16       = { __VA_ARGS__ }
47 
48     #include <dsp/common/const/const16.h>
49 
50     #undef DSP_F32ARRAY_IMPL
51     #undef DSP_F32ARRAY
52 
53     #undef DSP_U32REP4
54     #undef DSP_F32REP4
55 
56     #undef DSP_U32VECX4
57     #undef DSP_F32VECX4
58 
59     #undef DSP_U32VEC4
60     #undef DSP_F32VEC4
61 }
62 
63 #include <dsp/arch/x86/sse2/float.h>
64 
65 #include <dsp/arch/x86/sse2/search/iminmax.h>
66 
67 #include <dsp/arch/x86/sse2/graphics.h>
68 #include <dsp/arch/x86/sse2/graphics/effects.h>
69 #include <dsp/arch/x86/sse2/graphics/axis.h>
70 
71 #include <dsp/arch/x86/sse2/pmath/op_kx.h>
72 #include <dsp/arch/x86/sse2/pmath/op_vv.h>
73 #include <dsp/arch/x86/sse2/pmath/fmop_kx.h>
74 #include <dsp/arch/x86/sse2/pmath/fmop_vv.h>
75 #include <dsp/arch/x86/sse2/pmath/exp.h>
76 #include <dsp/arch/x86/sse2/pmath/log.h>
77 #include <dsp/arch/x86/sse2/pmath/pow.h>
78 
79 #undef DSP_ARCH_X86_SSE2_IMPL
80 
81 namespace sse2
82 {
83     using namespace x86;
84 
85 #define EXPORT2(function, export)           dsp::function = sse2::export; TEST_EXPORT(sse2::export);
86 #define EXPORT1(function)                   EXPORT2(function, function);
87 
dsp_init(const cpu_features_t * f)88     void dsp_init(const cpu_features_t *f)
89     {
90         if (((f->features) & (CPU_OPTION_SSE | CPU_OPTION_SSE2)) != (CPU_OPTION_SSE | CPU_OPTION_SSE2))
91             return;
92 
93         lsp_trace("Optimizing DSP for SSE2 instruction set");
94 
95         EXPORT1(copy_saturated);
96         EXPORT1(saturate);
97         EXPORT1(limit_saturate1);
98         EXPORT1(limit_saturate2);
99         EXPORT1(sanitize1);
100         EXPORT1(sanitize2);
101 
102         EXPORT1(mod_k2);
103         EXPORT1(rmod_k2);
104         EXPORT1(mod_k3);
105         EXPORT1(rmod_k3);
106 
107         EXPORT1(fmmod_k3);
108         EXPORT1(fmrmod_k3);
109         EXPORT1(fmmod_k4);
110         EXPORT1(fmrmod_k4);
111 
112         EXPORT1(mod2);
113         EXPORT1(rmod2);
114         EXPORT1(mod3);
115 
116         EXPORT1(exp1);
117         EXPORT1(exp2);
118         EXPORT1(logb1);
119         EXPORT1(logb2);
120         EXPORT1(loge1);
121         EXPORT1(loge2);
122         EXPORT1(logd1);
123         EXPORT1(logd2);
124         EXPORT1(powcv1);
125         EXPORT1(powcv2);
126         EXPORT1(powvc1);
127         EXPORT1(powvc2);
128         EXPORT1(powvx1);
129         EXPORT1(powvx2);
130 
131         EXPORT1(min_index);
132         EXPORT1(max_index);
133         EXPORT1(minmax_index);
134 
135         EXPORT1(abs_min_index);
136         EXPORT1(abs_max_index);
137         EXPORT1(abs_minmax_index);
138 
139         EXPORT1(hsla_to_rgba);
140         EXPORT1(rgba_to_hsla);
141         EXPORT1(rgba_to_bgra32);
142 
143         EXPORT1(eff_hsla_hue);
144         EXPORT1(eff_hsla_sat);
145         EXPORT1(eff_hsla_light);
146         EXPORT1(eff_hsla_alpha);
147 
148         EXPORT1(axis_apply_log1);
149         EXPORT1(axis_apply_log2);
150         EXPORT1(rgba32_to_bgra32);
151     }
152 
153     #undef EXPORT1
154     #undef EXPORT2
155 }
156