1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 13 дек. 2019 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_AVX_FASTCONV_H_ 23 #define DSP_ARCH_X86_AVX_FASTCONV_H_ 24 25 #ifndef DSP_ARCH_X86_AVX_IMPL 26 #error "This header should not be included directly" 27 #endif /* DSP_ARCH_X86_AVX_IMPL */ 28 29 #include <dsp/arch/x86/avx/fastconv/prepare.h> 30 #include <dsp/arch/x86/avx/fastconv/butterfly.h> 31 #include <dsp/arch/x86/avx/fastconv/apply.h> 32 33 namespace avx 34 { fastconv_parse(float * dst,const float * src,size_t rank)35 void fastconv_parse(float *dst, const float *src, size_t rank) 36 { 37 const float *ak = &FFT_A[(rank - 3) << 4]; 38 const float *wk = &FFT_DW[(rank - 3) << 4]; 39 size_t np = 1 << (rank - 1); 40 size_t nb = 1; 41 42 if (np > 4) 43 { 44 fastconv_direct_prepare(dst, src, ak, wk, np); 45 ak -= 16; 46 wk -= 16; 47 np >>= 1; 48 nb <<= 1; 49 } 50 else 51 fastconv_direct_unpack(dst, src); 52 53 while (np > 4) 54 { 55 fastconv_direct_butterfly(dst, ak, wk, np, nb); 56 ak -= 16; 57 wk -= 16; 58 np >>= 1; 59 nb <<= 1; 60 } 61 62 fastconv_direct_butterfly_last(dst, nb); 63 } 64 fastconv_parse_fma3(float * dst,const float * src,size_t rank)65 void fastconv_parse_fma3(float *dst, const float *src, size_t rank) 66 { 67 const float *ak = &FFT_A[(rank - 3) << 4]; 68 const float *wk = &FFT_DW[(rank - 3) << 4]; 69 size_t np = 1 << (rank - 1); 70 size_t nb = 1; 71 72 if (np > 4) 73 { 74 fastconv_direct_prepare_fma3(dst, src, ak, wk, np); 75 ak -= 16; 76 wk -= 16; 77 np >>= 1; 78 nb <<= 1; 79 } 80 else 81 fastconv_direct_unpack(dst, src); 82 83 while (np > 4) 84 { 85 fastconv_direct_butterfly_fma3(dst, ak, wk, np, nb); 86 ak -= 16; 87 wk -= 16; 88 np >>= 1; 89 nb <<= 1; 90 } 91 92 fastconv_direct_butterfly_last_fma3(dst, nb); 93 } 94 fastconv_restore(float * dst,float * tmp,size_t rank)95 void fastconv_restore(float *dst, float *tmp, size_t rank) 96 { 97 size_t nb = 1 << (rank - 3), np = 4; 98 const float *ak = FFT_A; 99 const float *wk = FFT_DW; 100 101 fastconv_reverse_prepare(tmp, nb); 102 if ((nb >>= 1) <= 0) 103 { 104 fastconv_reverse_unpack(dst, tmp, rank); 105 return; 106 } 107 ak += 16; 108 wk += 16; 109 np <<= 1; 110 111 while (nb > 1) 112 { 113 fastconv_reverse_butterfly(tmp, ak, wk, np, nb); 114 ak += 16; 115 wk += 16; 116 np <<= 1; 117 nb >>= 1; 118 } 119 120 fastconv_reverse_butterfly_last(dst, tmp, ak, wk, np); 121 } 122 fastconv_restore_fma3(float * dst,float * tmp,size_t rank)123 void fastconv_restore_fma3(float *dst, float *tmp, size_t rank) 124 { 125 size_t nb = 1 << (rank - 3), np = 4; 126 const float *ak = FFT_A; 127 const float *wk = FFT_DW; 128 129 fastconv_reverse_prepare_fma3(tmp, nb); 130 if ((nb >>= 1) <= 0) 131 { 132 fastconv_reverse_unpack(dst, tmp, rank); 133 return; 134 } 135 ak += 16; 136 wk += 16; 137 np <<= 1; 138 139 while (nb > 1) 140 { 141 fastconv_reverse_butterfly_fma3(tmp, ak, wk, np, nb); 142 ak += 16; 143 wk += 16; 144 np <<= 1; 145 nb >>= 1; 146 } 147 148 fastconv_reverse_butterfly_last_fma3(dst, tmp, ak, wk, np); 149 } 150 fastconv_apply(float * dst,float * tmp,const float * c1,const float * c2,size_t rank)151 void fastconv_apply(float *dst, float *tmp, const float *c1, const float *c2, size_t rank) 152 { 153 size_t nb = 1 << (rank - 3), np = 4; 154 const float *ak = FFT_A; 155 const float *wk = FFT_DW; 156 157 fastconv_apply_prepare(tmp, c1, c2, nb); 158 if ((nb >>= 1) <= 0) 159 { 160 fastconv_reverse_unpack_adding(dst, tmp, rank); 161 return; 162 } 163 ak += 16; 164 wk += 16; 165 np <<= 1; 166 167 while (nb > 1) 168 { 169 fastconv_reverse_butterfly(tmp, ak, wk, np, nb); 170 ak += 16; 171 wk += 16; 172 np <<= 1; 173 nb >>= 1; 174 } 175 176 fastconv_reverse_butterfly_last_adding(dst, tmp, ak, wk, np); 177 } 178 fastconv_apply_fma3(float * dst,float * tmp,const float * c1,const float * c2,size_t rank)179 void fastconv_apply_fma3(float *dst, float *tmp, const float *c1, const float *c2, size_t rank) 180 { 181 size_t nb = 1 << (rank - 3), np = 4; 182 const float *ak = FFT_A; 183 const float *wk = FFT_DW; 184 185 fastconv_apply_prepare_fma3(tmp, c1, c2, nb); 186 if ((nb >>= 1) <= 0) 187 { 188 fastconv_reverse_unpack_adding(dst, tmp, rank); 189 return; 190 } 191 ak += 16; 192 wk += 16; 193 np <<= 1; 194 195 while (nb > 1) 196 { 197 fastconv_reverse_butterfly_fma3(tmp, ak, wk, np, nb); 198 ak += 16; 199 wk += 16; 200 np <<= 1; 201 nb >>= 1; 202 } 203 204 fastconv_reverse_butterfly_last_adding_fma3(dst, tmp, ak, wk, np); 205 } 206 fastconv_parse_apply(float * dst,float * tmp,const float * c,const float * src,size_t rank)207 void fastconv_parse_apply(float *dst, float *tmp, const float *c, const float *src, size_t rank) 208 { 209 const float *ak = &FFT_A[(rank - 3) << 4]; 210 const float *wk = &FFT_DW[(rank - 3) << 4]; 211 size_t np = 1 << (rank - 1); 212 size_t nb = 1; 213 214 if (np > 4) 215 { 216 fastconv_direct_prepare(tmp, src, ak, wk, np); 217 ak -= 16; 218 wk -= 16; 219 np >>= 1; 220 nb <<= 1; 221 } 222 else 223 fastconv_direct_unpack(tmp, src); 224 225 while (np > 4) 226 { 227 fastconv_direct_butterfly(tmp, ak, wk, np, nb); 228 ak -= 16; 229 wk -= 16; 230 np >>= 1; 231 nb <<= 1; 232 } 233 234 fastconv_apply_internal(tmp, c, nb); 235 236 if ((nb >>= 1) <= 0) 237 { 238 fastconv_reverse_unpack_adding(dst, tmp, rank); 239 return; 240 } 241 ak += 16; 242 wk += 16; 243 np <<= 1; 244 245 while (nb > 1) 246 { 247 fastconv_reverse_butterfly(tmp, ak, wk, np, nb); 248 ak += 16; 249 wk += 16; 250 np <<= 1; 251 nb >>= 1; 252 } 253 254 fastconv_reverse_butterfly_last_adding(dst, tmp, ak, wk, np); 255 } 256 fastconv_parse_apply_fma3(float * dst,float * tmp,const float * c,const float * src,size_t rank)257 void fastconv_parse_apply_fma3(float *dst, float *tmp, const float *c, const float *src, size_t rank) 258 { 259 const float *ak = &FFT_A[(rank - 3) << 4]; 260 const float *wk = &FFT_DW[(rank - 3) << 4]; 261 size_t np = 1 << (rank - 1); 262 size_t nb = 1; 263 264 if (np > 4) 265 { 266 fastconv_direct_prepare_fma3(tmp, src, ak, wk, np); 267 ak -= 16; 268 wk -= 16; 269 np >>= 1; 270 nb <<= 1; 271 } 272 else 273 fastconv_direct_unpack(tmp, src); 274 275 while (np > 4) 276 { 277 fastconv_direct_butterfly_fma3(tmp, ak, wk, np, nb); 278 ak -= 16; 279 wk -= 16; 280 np >>= 1; 281 nb <<= 1; 282 } 283 284 fastconv_apply_internal_fma3(tmp, c, nb); 285 286 if ((nb >>= 1) <= 0) 287 { 288 fastconv_reverse_unpack_adding(dst, tmp, rank); 289 return; 290 } 291 ak += 16; 292 wk += 16; 293 np <<= 1; 294 295 while (nb > 1) 296 { 297 fastconv_reverse_butterfly_fma3(tmp, ak, wk, np, nb); 298 ak += 16; 299 wk += 16; 300 np <<= 1; 301 nb >>= 1; 302 } 303 304 fastconv_reverse_butterfly_last_adding_fma3(dst, tmp, ak, wk, np); 305 } 306 } 307 308 #endif /* DSP_ARCH_X86_AVX_FASTCONV_H_ */ 309