1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 17 дек. 2018 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #include <dsp/dsp.h> 23 #include <test/test.h> 24 25 #include <core/types.h> 26 #include <core/debug.h> 27 28 #include <dsp/arch/x86/features.h> 29 30 #define DSP_ARCH_X86_AVX2_IMPL 31 32 #include <dsp/arch/x86/avx2/float.h> 33 34 #include <dsp/arch/x86/avx2/pmath/op_kx.h> 35 #include <dsp/arch/x86/avx2/pmath/fmop_kx.h> 36 #include <dsp/arch/x86/avx2/pmath/exp.h> 37 #include <dsp/arch/x86/avx2/pmath/log.h> 38 #include <dsp/arch/x86/avx2/pmath/pow.h> 39 40 #include <dsp/arch/x86/avx2/fft/normalize.h> 41 42 #include <dsp/arch/x86/avx2/search/iminmax.h> 43 44 #include <dsp/arch/x86/avx2/graphics/transpose.h> 45 #include <dsp/arch/x86/avx2/graphics/effects.h> 46 47 #undef DSP_ARCH_X86_AVX2_IMPL 48 49 namespace avx2 50 { 51 using namespace x86; 52 53 #define EXPORT2(function, export) { dsp::function = avx2::export; TEST_EXPORT(avx2::export); } 54 #define EXPORT1(function) EXPORT2(function, function) 55 56 #define EXPORT2_X64(function, export) IF_ARCH_X86_64(EXPORT2(function, export)); 57 #define SUPPORT_X64(function) IF_ARCH_X86_64(TEST_EXPORT(avx2::function)) 58 59 #define CEXPORT2(cond, function, export) \ 60 IF_ARCH_X86( \ 61 TEST_EXPORT(avx2::export); \ 62 if (cond) \ 63 dsp::function = avx2::export; \ 64 ); 65 66 #define CEXPORT1(cond, export) \ 67 IF_ARCH_X86( \ 68 TEST_EXPORT(avx2::export); \ 69 if (cond) \ 70 dsp::export = avx2::export; \ 71 ); 72 73 #define CEXPORT2_X64(cond, function, export) \ 74 IF_ARCH_X86_64( \ 75 TEST_EXPORT(avx2::export); \ 76 if (cond) \ 77 dsp::function = avx2::export; \ 78 ); 79 80 #define CEXPORT1_X64(cond, export) \ 81 IF_ARCH_X86_64( \ 82 TEST_EXPORT(avx2::export); \ 83 if (cond) \ 84 dsp::export = avx2::export; \ 85 ); 86 dsp_init(const cpu_features_t * f)87 void dsp_init(const cpu_features_t *f) 88 { 89 if ((f->features & (CPU_OPTION_AVX | CPU_OPTION_AVX2)) != (CPU_OPTION_AVX | CPU_OPTION_AVX2)) 90 return; 91 92 lsp_trace("Optimizing DSP for AVX2 instruction set"); 93 94 bool favx = feature_check(f, FEAT_FAST_AVX); 95 96 CEXPORT1(favx, limit_saturate1); 97 CEXPORT1(favx, limit_saturate2); 98 CEXPORT1(favx, copy_saturated); 99 CEXPORT1(favx, saturate); 100 CEXPORT1(favx, sanitize1); 101 CEXPORT1(favx, sanitize2); 102 103 CEXPORT1(favx, add_k2); 104 CEXPORT1(favx, sub_k2); 105 CEXPORT1(favx, rsub_k2); 106 CEXPORT1(favx, mul_k2); 107 CEXPORT1(favx, div_k2); 108 CEXPORT1(favx, rdiv_k2); 109 CEXPORT1(favx, mod_k2); 110 CEXPORT1(favx, rmod_k2); 111 112 CEXPORT1(favx, add_k3); 113 CEXPORT1(favx, sub_k3); 114 CEXPORT1(favx, rsub_k3); 115 CEXPORT1(favx, mul_k3); 116 CEXPORT1(favx, div_k3); 117 CEXPORT1(favx, rdiv_k3); 118 CEXPORT1(favx, mod_k3); 119 CEXPORT1(favx, rmod_k3); 120 121 CEXPORT1(favx, fmadd_k3); 122 CEXPORT1(favx, fmsub_k3); 123 CEXPORT1(favx, fmrsub_k3); 124 CEXPORT1(favx, fmmul_k3); 125 CEXPORT1(favx, fmdiv_k3); 126 CEXPORT1(favx, fmrdiv_k3); 127 CEXPORT1(favx, fmmod_k3); 128 CEXPORT1(favx, fmrmod_k3); 129 130 CEXPORT1(favx, fmadd_k4); 131 CEXPORT1(favx, fmsub_k4); 132 CEXPORT1(favx, fmrsub_k4); 133 CEXPORT1(favx, fmmul_k4); 134 CEXPORT1(favx, fmdiv_k4); 135 CEXPORT1(favx, fmrdiv_k4); 136 CEXPORT1(favx, fmmod_k4); 137 CEXPORT1(favx, fmrmod_k4); 138 139 CEXPORT2_X64(favx, exp1, x64_exp1); 140 CEXPORT2_X64(favx, exp2, x64_exp2); 141 142 CEXPORT2_X64(favx, logb1, x64_logb1); 143 CEXPORT2_X64(favx, logb2, x64_logb2); 144 CEXPORT2_X64(favx, loge1, x64_loge1); 145 CEXPORT2_X64(favx, loge2, x64_loge2); 146 CEXPORT2_X64(favx, logd1, x64_logd1); 147 CEXPORT2_X64(favx, logd2, x64_logd2); 148 149 CEXPORT2_X64(favx, powcv1, x64_powcv1); 150 CEXPORT2_X64(favx, powcv2, x64_powcv2); 151 CEXPORT2_X64(favx, powvc1, x64_powvc1); 152 CEXPORT2_X64(favx, powvc2, x64_powvc2); 153 CEXPORT2_X64(favx, powvx1, x64_powvx1); 154 CEXPORT2_X64(favx, powvx2, x64_powvx2); 155 156 CEXPORT2_X64(favx, eff_hsla_hue, x64_eff_hsla_hue); 157 CEXPORT2_X64(favx, eff_hsla_sat, x64_eff_hsla_sat); 158 CEXPORT2_X64(favx, eff_hsla_light, x64_eff_hsla_light); 159 CEXPORT2_X64(favx, eff_hsla_alpha, x64_eff_hsla_alpha); 160 161 CEXPORT1(favx, normalize_fft2); 162 CEXPORT1(favx, normalize_fft3); 163 164 if (f->features & CPU_OPTION_FMA3) 165 { 166 CEXPORT2_X64(favx, mod_k2, mod_k2_fma3); 167 CEXPORT2_X64(favx, rmod_k2, rmod_k2_fma3); 168 169 CEXPORT2_X64(favx, mod_k3, mod_k3_fma3); 170 CEXPORT2_X64(favx, rmod_k3, rmod_k3_fma3); 171 172 CEXPORT2_X64(favx, fmadd_k3, fmadd_k3_fma3); 173 CEXPORT2_X64(favx, fmsub_k3, fmsub_k3_fma3); 174 CEXPORT2_X64(favx, fmrsub_k3, fmrsub_k3_fma3); 175 CEXPORT2_X64(favx, fmmod_k3, fmmod_k3_fma3); 176 CEXPORT2_X64(favx, fmrmod_k3, fmrmod_k3_fma3); 177 178 CEXPORT2_X64(favx, fmadd_k4, fmadd_k4_fma3); 179 CEXPORT2_X64(favx, fmsub_k4, fmsub_k4_fma3); 180 CEXPORT2_X64(favx, fmrsub_k4, fmrsub_k4_fma3); 181 CEXPORT2_X64(favx, fmmod_k4, fmmod_k4_fma3); 182 CEXPORT2_X64(favx, fmrmod_k4, fmrmod_k4_fma3); 183 184 CEXPORT2_X64(favx, exp1, x64_exp1_fma3); 185 CEXPORT2_X64(favx, exp2, x64_exp2_fma3); 186 187 CEXPORT2_X64(favx, logb1, x64_logb1_fma3); 188 CEXPORT2_X64(favx, logb2, x64_logb2_fma3); 189 CEXPORT2_X64(favx, loge1, x64_loge1_fma3); 190 CEXPORT2_X64(favx, loge2, x64_loge2_fma3); 191 CEXPORT2_X64(favx, logd1, x64_logd1_fma3); 192 CEXPORT2_X64(favx, logd2, x64_logd2_fma3); 193 194 CEXPORT2_X64(favx, powcv1, x64_powcv1_fma3); 195 CEXPORT2_X64(favx, powcv2, x64_powcv2_fma3); 196 CEXPORT2_X64(favx, powvc1, x64_powvc1_fma3); 197 CEXPORT2_X64(favx, powvc2, x64_powvc2_fma3); 198 CEXPORT2_X64(favx, powvx1, x64_powvx1_fma3); 199 CEXPORT2_X64(favx, powvx2, x64_powvx2_fma3); 200 } 201 } 202 } 203