1 /* 2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/> 3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com> 4 * 5 * This file is part of lsp-plugins 6 * Created on: 05 окт. 2015 г. 7 * 8 * lsp-plugins is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * any later version. 12 * 13 * lsp-plugins is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>. 20 */ 21 22 #ifndef DSP_ARCH_X86_SSE_FFT_H_ 23 #define DSP_ARCH_X86_SSE_FFT_H_ 24 25 #ifndef DSP_ARCH_X86_SSE_IMPL 26 #error "This header should not be included directly" 27 #endif /* DSP_ARCH_X86_SSE_IMPL */ 28 29 #include <dsp/arch/x86/sse/fft/const.h> 30 #include <dsp/arch/x86/sse/fft/butterfly.h> 31 #include <dsp/arch/x86/sse/fft/p_butterfly.h> 32 #include <dsp/arch/x86/sse/fft/normalize.h> 33 34 // Use 8-bit-reverse algorithm 35 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct8 36 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse8 37 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct8 38 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse8 39 #define FFT_TYPE uint8_t 40 #include <dsp/arch/x86/sse/fft/scramble.h> 41 42 #define FFT_SCRAMBLE_SELF_DIRECT_NAME packed_scramble_self_direct8 43 #define FFT_SCRAMBLE_SELF_REVERSE_NAME packed_scramble_self_reverse8 44 #define FFT_SCRAMBLE_COPY_DIRECT_NAME packed_scramble_copy_direct8 45 #define FFT_SCRAMBLE_COPY_REVERSE_NAME packed_scramble_copy_reverse8 46 #define FFT_TYPE uint8_t 47 #include <dsp/arch/x86/sse/fft/p_scramble.h> 48 49 // Use 16-bit-reverse algorithm 50 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct16 51 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse16 52 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct16 53 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse16 54 #define FFT_TYPE uint16_t 55 #include <dsp/arch/x86/sse/fft/scramble.h> 56 57 #define FFT_SCRAMBLE_SELF_DIRECT_NAME packed_scramble_self_direct16 58 #define FFT_SCRAMBLE_SELF_REVERSE_NAME packed_scramble_self_reverse16 59 #define FFT_SCRAMBLE_COPY_DIRECT_NAME packed_scramble_copy_direct16 60 #define FFT_SCRAMBLE_COPY_REVERSE_NAME packed_scramble_copy_reverse16 61 #define FFT_TYPE uint16_t 62 #include <dsp/arch/x86/sse/fft/p_scramble.h> 63 64 // Make set of scramble-switch implementations 65 #define FFT_SCRAMBLE_SELF_DIRECT_NAME scramble_self_direct 66 #define FFT_SCRAMBLE_COPY_DIRECT_NAME scramble_copy_direct 67 #define FFT_SCRAMBLE_SELF_REVERSE_NAME scramble_self_reverse 68 #define FFT_SCRAMBLE_COPY_REVERSE_NAME scramble_copy_reverse 69 #define FFT_SCRAMBLE_DIRECT_NAME scramble_direct 70 #define FFT_SCRAMBLE_REVERSE_NAME scramble_reverse 71 #include <dsp/arch/x86/sse/fft/switch.h> 72 73 #define FFT_SCRAMBLE_SELF_DIRECT_NAME packed_scramble_self_direct 74 #define FFT_SCRAMBLE_COPY_DIRECT_NAME packed_scramble_copy_direct 75 #define FFT_SCRAMBLE_SELF_REVERSE_NAME packed_scramble_self_reverse 76 #define FFT_SCRAMBLE_COPY_REVERSE_NAME packed_scramble_copy_reverse 77 #define FFT_SCRAMBLE_DIRECT_NAME packed_scramble_direct 78 #define FFT_SCRAMBLE_REVERSE_NAME packed_scramble_reverse 79 #define FFT_REPACK packed_fft_repack 80 #define FFT_REPACK_NORMALIZE packed_fft_repack_normalize 81 #include <dsp/arch/x86/sse/fft/p_switch.h> 82 83 namespace sse 84 { 85 // Make set of scramble implementations direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)86 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 87 { 88 // Check bounds 89 if (rank <= 2) 90 { 91 if (rank == 2) 92 { 93 float s0_re = src_re[0] + src_re[1]; 94 float s1_re = src_re[0] - src_re[1]; 95 float s2_re = src_re[2] + src_re[3]; 96 float s3_re = src_re[2] - src_re[3]; 97 98 float s0_im = src_im[0] + src_im[1]; 99 float s1_im = src_im[0] - src_im[1]; 100 float s2_im = src_im[2] + src_im[3]; 101 float s3_im = src_im[2] - src_im[3]; 102 103 dst_re[0] = s0_re + s2_re; 104 dst_re[1] = s1_re + s3_im; 105 dst_re[2] = s0_re - s2_re; 106 dst_re[3] = s1_re - s3_im; 107 108 dst_im[0] = s0_im + s2_im; 109 dst_im[1] = s1_im - s3_re; 110 dst_im[2] = s0_im - s2_im; 111 dst_im[3] = s1_im + s3_re; 112 } 113 else if (rank == 1) 114 { 115 // s0' = s0 + s1 116 // s1' = s0 - s1 117 float s1_re = src_re[1]; 118 float s1_im = src_im[1]; 119 dst_re[1] = src_re[0] - s1_re; 120 dst_im[1] = src_im[0] - s1_im; 121 dst_re[0] = src_re[0] + s1_re; 122 dst_im[0] = src_im[0] + s1_im; 123 } 124 else 125 { 126 dst_re[0] = src_re[0]; 127 dst_im[0] = src_im[0]; 128 } 129 return; 130 } 131 132 scramble_direct(dst_re, dst_im, src_re, src_im, rank); 133 134 for (size_t i=2; i < rank; ++i) 135 butterfly_direct(dst_re, dst_im, i, 1 << (rank - i - 1)); 136 } 137 packed_direct_fft(float * dst,const float * src,size_t rank)138 void packed_direct_fft(float *dst, const float *src, size_t rank) 139 { 140 // Check bounds 141 if (rank <= 2) 142 { 143 if (rank == 2) 144 { 145 float s0_re = dst[0] + dst[2]; 146 float s1_re = dst[0] - dst[2]; 147 float s0_im = dst[1] + dst[3]; 148 float s1_im = dst[1] - dst[3]; 149 150 float s2_re = dst[4] + dst[6]; 151 float s3_re = dst[4] - dst[6]; 152 float s2_im = dst[5] + dst[7]; 153 float s3_im = dst[5] - dst[7]; 154 155 dst[0] = s0_re + s2_re; 156 dst[1] = s0_im + s2_im; 157 dst[2] = s1_re + s3_im; 158 dst[3] = s1_im - s3_re; 159 160 dst[4] = s0_re - s2_re; 161 dst[5] = s0_im - s2_im; 162 dst[6] = s1_re - s3_im; 163 dst[7] = s1_im + s3_re; 164 } 165 else if (rank == 1) 166 { 167 // s0' = s0 + s1 168 // s1' = s0 - s1 169 float s1_re = src[2]; 170 float s1_im = src[3]; 171 dst[2] = src[0] - s1_re; 172 dst[3] = src[1] - s1_im; 173 dst[0] = src[0] + s1_re; 174 dst[1] = src[1] + s1_im; 175 } 176 else 177 { 178 dst[0] = src[0]; 179 dst[1] = src[1]; 180 } 181 return; 182 } 183 184 // Iterate butterflies 185 packed_scramble_direct(dst, src, rank); 186 187 for (size_t i=2; i < rank; ++i) 188 packed_butterfly_direct(dst, i, 1 << (rank - i - 1)); 189 190 packed_fft_repack(dst, rank); 191 } 192 reverse_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)193 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank) 194 { 195 // Check bounds 196 if (rank <= 2) 197 { 198 if (rank == 2) 199 { 200 float s0_re = src_re[0] + src_re[1]; 201 float s1_re = src_re[0] - src_re[1]; 202 float s2_re = src_re[2] + src_re[3]; 203 float s3_re = src_re[2] - src_re[3]; 204 205 float s0_im = src_im[0] + src_im[1]; 206 float s1_im = src_im[0] - src_im[1]; 207 float s2_im = src_im[2] + src_im[3]; 208 float s3_im = src_im[2] - src_im[3]; 209 210 dst_re[0] = (s0_re + s2_re)*0.25f; 211 dst_re[1] = (s1_re - s3_im)*0.25f; 212 dst_re[2] = (s0_re - s2_re)*0.25f; 213 dst_re[3] = (s1_re + s3_im)*0.25f; 214 215 dst_im[0] = (s0_im + s2_im)*0.25f; 216 dst_im[1] = (s1_im + s3_re)*0.25f; 217 dst_im[2] = (s0_im - s2_im)*0.25f; 218 dst_im[3] = (s1_im - s3_re)*0.25f; 219 } 220 else if (rank == 1) 221 { 222 // s0' = s0 + s1 223 // s1' = s0 - s1 224 float s1_re = src_re[1]; 225 float s1_im = src_im[1]; 226 dst_re[1] = (src_re[0] - s1_re) * 0.5f; 227 dst_im[1] = (src_im[0] - s1_im) * 0.5f; 228 dst_re[0] = (src_re[0] + s1_re) * 0.5f; 229 dst_im[0] = (src_im[0] + s1_im) * 0.5f; 230 } 231 else 232 { 233 dst_re[0] = src_re[0]; 234 dst_im[0] = src_im[0]; 235 } 236 return; 237 } 238 239 // Iterate butterflies 240 scramble_reverse(dst_re, dst_im, src_re, src_im, rank); 241 242 for (size_t i=2; i < rank; ++i) 243 butterfly_reverse(dst_re, dst_im, i, 1 << (rank - i - 1)); 244 245 dsp::normalize_fft2(dst_re, dst_im, rank); 246 } 247 packed_reverse_fft(float * dst,const float * src,size_t rank)248 void packed_reverse_fft(float *dst, const float *src, size_t rank) 249 { 250 // Check bounds 251 if (rank <= 2) 252 { 253 if (rank == 2) 254 { 255 float s0_re = src[0] + src[2]; 256 float s1_re = src[0] - src[2]; 257 float s2_re = src[4] + src[6]; 258 float s3_re = src[4] - src[6]; 259 260 float s0_im = src[1] + src[3]; 261 float s1_im = src[1] - src[3]; 262 float s2_im = src[5] + src[7]; 263 float s3_im = src[5] - src[7]; 264 265 dst[0] = (s0_re + s2_re)*0.25f; 266 dst[1] = (s0_im + s2_im)*0.25f; 267 dst[2] = (s1_re - s3_im)*0.25f; 268 dst[3] = (s1_im + s3_re)*0.25f; 269 270 dst[4] = (s0_re - s2_re)*0.25f; 271 dst[5] = (s0_im - s2_im)*0.25f; 272 dst[6] = (s1_re + s3_im)*0.25f; 273 dst[7] = (s1_im - s3_re)*0.25f; 274 } 275 else if (rank == 1) 276 { 277 // s0' = s0 + s1 278 // s1' = s0 - s1 279 float s1_re = src[2]; 280 float s1_im = src[3]; 281 dst[2] = src[0] - s1_re; 282 dst[3] = src[1] - s1_im; 283 dst[0] = src[0] + s1_re; 284 dst[1] = src[1] + s1_im; 285 } 286 else 287 { 288 dst[0] = src[0]; 289 dst[1] = src[1]; 290 } 291 return; 292 } 293 294 // Iterate butterflies 295 packed_scramble_reverse(dst, src, rank); 296 297 for (size_t i=2; i < rank; ++i) 298 packed_butterfly_reverse(dst, i, 1 << (rank - i - 1)); 299 300 packed_fft_repack_normalize(dst, rank); 301 } 302 } 303 304 #endif /* DSP_ARCH_X86_SSE_FFT_H_ */ 305