1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 9 дек. 2019 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_AVX_FFT_H_ 23 #define DSP_ARCH_X86_AVX_FFT_H_ 24 25 #ifndef DSP_ARCH_X86_AVX_IMPL 26 #error "This header should not be included directly" 27 #endif /* DSP_ARCH_X86_AVX_IMPL */ 28 29 #include <dsp/arch/x86/avx/fft/const.h> 30 #include <dsp/arch/x86/avx/fft/butterfly.h> 31 #include <dsp/arch/x86/avx/fft/normalize.h> 32 33 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct8 34 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse8 35 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct8 36 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse8 37 #define FFT_TYPE uint8_t 38 #define FFT_FMA(a, b) a 39 #include <dsp/arch/x86/avx/fft/scramble.h> 40 41 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct16 42 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse16 43 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct16 44 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse16 45 #define FFT_TYPE uint16_t 46 #define FFT_FMA(a, b) a 47 #include <dsp/arch/x86/avx/fft/scramble.h> 48 49 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct8_fma3 50 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse8_fma3 51 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct8_fma3 52 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse8_fma3 53 #define FFT_TYPE uint8_t 54 #define FFT_FMA(a, b) b 55 #include <dsp/arch/x86/avx/fft/scramble.h> 56 57 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct16_fma3 58 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse16_fma3 59 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct16_fma3 60 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse16_fma3 61 #define FFT_TYPE uint16_t 62 #define FFT_FMA(a, b) b 63 #include <dsp/arch/x86/avx/fft/scramble.h> 64 65 namespace avx 66 { small_direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)67 static inline void small_direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 68 { 69 if (rank == 2) 70 { 71 float s0_re = src_re[0] + src_re[1]; 72 float s1_re = src_re[0] - src_re[1]; 73 float s2_re = src_re[2] + src_re[3]; 74 float s3_re = src_re[2] - src_re[3]; 75 76 float s0_im = src_im[0] + src_im[1]; 77 float s1_im = src_im[0] - src_im[1]; 78 float s2_im = src_im[2] + src_im[3]; 79 float s3_im = src_im[2] - src_im[3]; 80 81 dst_re[0] = s0_re + s2_re; 82 dst_re[1] = s1_re + s3_im; 83 dst_re[2] = s0_re - s2_re; 84 dst_re[3] = s1_re - s3_im; 85 86 dst_im[0] = s0_im + s2_im; 87 dst_im[1] = s1_im - s3_re; 88 dst_im[2] = s0_im - s2_im; 89 dst_im[3] = s1_im + s3_re; 90 } 91 else if (rank == 1) 92 { 93 // s0' = s0 + s1 94 // s1' = s0 - s1 95 float s1_re = src_re[1]; 96 float s1_im = src_im[1]; 97 dst_re[1] = src_re[0] - s1_re; 98 dst_im[1] = src_im[0] - s1_im; 99 dst_re[0] = src_re[0] + s1_re; 100 dst_im[0] = src_im[0] + s1_im; 101 } 102 else 103 { 104 dst_re[0] = src_re[0]; 105 dst_im[0] = src_im[0]; 106 } 107 } 108 small_reverse_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)109 static inline void small_reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 110 { 111 if (rank == 2) 112 { 113 float s0_re = src_re[0] + src_re[1]; 114 float s1_re = src_re[0] - src_re[1]; 115 float s2_re = src_re[2] + src_re[3]; 116 float s3_re = src_re[2] - src_re[3]; 117 118 float s0_im = src_im[0] + src_im[1]; 119 float s1_im = src_im[0] - src_im[1]; 120 float s2_im = src_im[2] + src_im[3]; 121 float s3_im = src_im[2] - src_im[3]; 122 123 dst_re[0] = s0_re + s2_re; 124 dst_re[1] = s1_re + s3_im; 125 dst_re[2] = s0_re - s2_re; 126 dst_re[3] = s1_re - s3_im; 127 128 dst_im[0] = s0_im + s2_im; 129 dst_im[1] = s1_im - s3_re; 130 dst_im[2] = s0_im - s2_im; 131 dst_im[3] = s1_im + s3_re; 132 } 133 else if (rank == 1) 134 { 135 // s0' = s0 + s1 136 // s1' = s0 - s1 137 float s1_re = src_re[1]; 138 float s1_im = src_im[1]; 139 dst_re[1] = src_re[0] - s1_re; 140 dst_im[1] = src_im[0] - s1_im; 141 dst_re[0] = src_re[0] + s1_re; 142 dst_im[0] = src_im[0] + s1_im; 143 } 144 else 145 { 146 dst_re[0] = src_re[0]; 147 dst_im[0] = src_im[0]; 148 } 149 } 150 direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)151 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 152 { 153 // Check bounds 154 if (rank <= 2) 155 { 156 small_direct_fft(dst_re, dst_im, src_re, src_im, rank); 157 return; 158 } 159 160 if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4)) 161 { 162 dsp::move(dst_re, src_re, 1 << rank); 163 dsp::move(dst_im, src_im, 1 << rank); 164 if (rank <= 8) 165 scramble_self_direct8(dst_re, dst_im, rank); 166 else 167 scramble_self_direct16(dst_re, dst_im, rank); 168 } 169 else 170 { 171 if (rank <= 12) 172 scramble_copy_direct8(dst_re, dst_im, src_re, src_im, rank-4); 173 else 174 scramble_copy_direct16(dst_re, dst_im, src_re, src_im, rank-4); 175 } 176 177 for (size_t i=3; i < rank; ++i) 178 butterfly_direct8p(dst_re, dst_im, i, 1 << (rank - i - 1)); 179 } 180 direct_fft_fma3(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)181 void direct_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 182 { 183 // Check bounds 184 if (rank <= 2) 185 { 186 small_direct_fft(dst_re, dst_im, src_re, src_im, rank); 187 return; 188 } 189 190 if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4)) 191 { 192 dsp::move(dst_re, src_re, 1 << rank); 193 dsp::move(dst_im, src_im, 1 << rank); 194 if (rank <= 8) 195 scramble_self_direct8_fma3(dst_re, dst_im, rank); 196 else 197 scramble_self_direct16_fma3(dst_re, dst_im, rank); 198 } 199 else 200 { 201 if (rank <= 12) 202 scramble_copy_direct8_fma3(dst_re, dst_im, src_re, src_im, rank-4); 203 else 204 scramble_copy_direct16_fma3(dst_re, dst_im, src_re, src_im, rank-4); 205 } 206 207 for (size_t i=3; i < rank; ++i) 208 butterfly_direct8p_fma3(dst_re, dst_im, i, 1 << (rank - i - 1)); 209 } 210 reverse_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)211 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 212 { 213 // Check bounds 214 if (rank <= 2) 215 { 216 small_reverse_fft(dst_re, dst_im, src_re, src_im, rank); 217 return; 218 } 219 220 if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4)) 221 { 222 dsp::move(dst_re, src_re, 1 << rank); 223 dsp::move(dst_im, src_im, 1 << rank); 224 if (rank <= 8) 225 scramble_self_reverse8(dst_re, dst_im, rank); 226 else 227 scramble_self_reverse16(dst_re, dst_im, rank); 228 } 229 else 230 { 231 if (rank <= 12) 232 scramble_copy_reverse8(dst_re, dst_im, src_re, src_im, rank-4); 233 else 234 scramble_copy_reverse16(dst_re, dst_im, src_re, src_im, rank-4); 235 } 236 237 for (size_t i=3; i < rank; ++i) 238 butterfly_reverse8p(dst_re, dst_im, i, 1 << (rank - i - 1)); 239 240 dsp::normalize_fft2(dst_re, dst_im, rank); 241 } 242 reverse_fft_fma3(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)243 void reverse_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 244 { 245 // Check bounds 246 if (rank <= 2) 247 { 248 small_reverse_fft(dst_re, dst_im, src_re, src_im, rank); 249 return; 250 } 251 252 if ((dst_re == src_re) || (dst_im == src_im) || (rank < 4)) 253 { 254 dsp::move(dst_re, src_re, 1 << rank); 255 dsp::move(dst_im, src_im, 1 << rank); 256 if (rank <= 8) 257 scramble_self_reverse8_fma3(dst_re, dst_im, rank); 258 else 259 scramble_self_reverse16_fma3(dst_re, dst_im, rank); 260 } 261 else 262 { 263 if (rank <= 12) 264 scramble_copy_reverse8_fma3(dst_re, dst_im, src_re, src_im, rank-4); 265 else 266 scramble_copy_reverse16_fma3(dst_re, dst_im, src_re, src_im, rank-4); 267 } 268 269 for (size_t i=3; i < rank; ++i) 270 butterfly_reverse8p_fma3(dst_re, dst_im, i, 1 << (rank - i - 1)); 271 272 dsp::normalize_fft2(dst_re, dst_im, rank); 273 } 274 } 275 276 #endif /* DSP_ARCH_X86_AVX_FFT_H_ */ 277