1 /*
2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4 *
5 * This file is part of lsp-plugins
6 * Created on: 25 окт. 2018 г.
7 *
8 * lsp-plugins is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * any later version.
12 *
13 * lsp-plugins is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include <dsp/bits.h>
23 #include <test/mtest.h>
24 #include <test/FloatBuffer.h>
25
26 #define RANK 4
27 #define BUF_SIZE (1 << RANK)
28
29 static const float XFFT_DW[] __lsp_aligned16 =
30 {
31 // Re, Im
32 0.0000000000000000f, 1.0000000000000000f,
33 0.0000000000000000f, 1.0000000000000000f,
34 0.7071067811865475f, 0.7071067811865475f,
35 0.9238795325112868f, 0.3826834323650898f,
36 0.9807852804032305f, 0.1950903220161283f,
37 0.9951847266721969f, 0.0980171403295606f,
38 0.9987954562051724f, 0.0490676743274180f,
39 0.9996988186962042f, 0.0245412285229123f,
40 0.9999247018391445f, 0.0122715382857199f,
41 0.9999811752826011f, 0.0061358846491545f,
42 0.9999952938095762f, 0.0030679567629660f,
43 0.9999988234517019f, 0.0015339801862848f,
44 0.9999997058628822f, 0.0007669903187427f,
45 0.9999999264657179f, 0.0003834951875714f,
46 0.9999999816164293f, 0.0001917475973107f,
47 0.9999999954041073f, 0.0000958737990960f,
48 0.9999999988510268f, 0.0000479368996031f
49 };
50
51 static const float XFFT_A_RE[] __lsp_aligned16 =
52 {
53 1.0000000000000000f, 0.7071067811865475f, 0.0000000000000000f, -0.7071067811865475f,
54 1.0000000000000000f, 0.9238795325112868f, 0.7071067811865475f, 0.3826834323650898f,
55 1.0000000000000000f, 0.9807852804032305f, 0.9238795325112868f, 0.8314696123025452f,
56 1.0000000000000000f, 0.9951847266721969f, 0.9807852804032305f, 0.9569403357322089f,
57 1.0000000000000000f, 0.9987954562051724f, 0.9951847266721969f, 0.9891765099647810f,
58 1.0000000000000000f, 0.9996988186962042f, 0.9987954562051724f, 0.9972904566786902f,
59 1.0000000000000000f, 0.9999247018391445f, 0.9996988186962042f, 0.9993223845883495f,
60 1.0000000000000000f, 0.9999811752826011f, 0.9999247018391445f, 0.9998305817958234f,
61 1.0000000000000000f, 0.9999952938095762f, 0.9999811752826011f, 0.9999576445519639f,
62 1.0000000000000000f, 0.9999988234517019f, 0.9999952938095762f, 0.9999894110819284f,
63 1.0000000000000000f, 0.9999997058628822f, 0.9999988234517019f, 0.9999973527669782f,
64 1.0000000000000000f, 0.9999999264657179f, 0.9999997058628822f, 0.9999993381915255f,
65 1.0000000000000000f, 0.9999999816164293f, 0.9999999264657179f, 0.9999998345478677f,
66 1.0000000000000000f, 0.9999999954041073f, 0.9999999816164293f, 0.9999999586369661f,
67 1.0000000000000000f, 0.9999999988510268f, 0.9999999954041073f, 0.9999999896592415f
68 };
69
70 static const float XFFT_A_IM[] __lsp_aligned16 =
71 {
72 0.0000000000000000f, 0.7071067811865475f, 1.0000000000000000f, 0.7071067811865476f,
73 0.0000000000000000f, 0.3826834323650898f, 0.7071067811865475f, 0.9238795325112867f,
74 0.0000000000000000f, 0.1950903220161283f, 0.3826834323650898f, 0.5555702330196022f,
75 0.0000000000000000f, 0.0980171403295606f, 0.1950903220161283f, 0.2902846772544624f,
76 0.0000000000000000f, 0.0490676743274180f, 0.0980171403295606f, 0.1467304744553617f,
77 0.0000000000000000f, 0.0245412285229123f, 0.0490676743274180f, 0.0735645635996674f,
78 0.0000000000000000f, 0.0122715382857199f, 0.0245412285229123f, 0.0368072229413588f,
79 0.0000000000000000f, 0.0061358846491545f, 0.0122715382857199f, 0.0184067299058048f,
80 0.0000000000000000f, 0.0030679567629660f, 0.0061358846491545f, 0.0092037547820598f,
81 0.0000000000000000f, 0.0015339801862848f, 0.0030679567629660f, 0.0046019261204486f,
82 0.0000000000000000f, 0.0007669903187427f, 0.0015339801862848f, 0.0023009691514258f,
83 0.0000000000000000f, 0.0003834951875714f, 0.0007669903187427f, 0.0011504853371138f,
84 0.0000000000000000f, 0.0001917475973107f, 0.0003834951875714f, 0.0005752427637321f,
85 0.0000000000000000f, 0.0000958737990960f, 0.0001917475973107f, 0.0002876213937629f,
86 0.0000000000000000f, 0.0000479368996031f, 0.0000958737990960f, 0.0001438106983686f
87 };
88
scramble_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)89 static void scramble_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
90 {
91 size_t items = 1 << rank;
92
93 // Scramble the order of samples
94 if ((dst_re != src_re) && (dst_im != src_im))
95 {
96 for (size_t i = 0; i < items; ++i)
97 {
98 size_t j = reverse_bits(i, rank);
99 dst_re[i] = src_re[j];
100 dst_im[i] = src_im[j];
101 }
102 }
103 else
104 {
105 for (size_t i = 1; i < (items - 1); ++i)
106 {
107 size_t j = reverse_bits(i, rank);
108 if (i >= j)
109 continue;
110
111 float re = dst_re[i];
112 float im = dst_im[i];
113 dst_re[i] = dst_re[j];
114 dst_im[i] = dst_im[j];
115 dst_re[j] = re;
116 dst_im[j] = im;
117 }
118 }
119 }
120
start_direct_fft(float * dst_re,float * dst_im,size_t rank)121 static void start_direct_fft(float *dst_re, float *dst_im, size_t rank)
122 {
123 size_t iterations = 1 << (rank - 2);
124 while (iterations--)
125 {
126 // Perform 4-calculations
127 // s0' = s0 + s1
128 // s1' = s0 - s1
129 // s2' = s2 + s3
130 // s3' = s2 - s3
131 // s0'' = s0' + s2'
132 // s1'' = s1' - j * s3'
133 // s2'' = s0' - s2'
134 // s3'' = s1' + j * s3'
135 float s0_re = dst_re[0] + dst_re[1];
136 float s1_re = dst_re[0] - dst_re[1];
137 float s2_re = dst_re[2] + dst_re[3];
138 float s3_re = dst_re[2] - dst_re[3];
139
140 float s0_im = dst_im[0] + dst_im[1];
141 float s1_im = dst_im[0] - dst_im[1];
142 float s2_im = dst_im[2] + dst_im[3];
143 float s3_im = dst_im[2] - dst_im[3];
144
145 dst_re[0] = s0_re + s2_re;
146 dst_re[1] = s1_re + s3_im;
147 dst_re[2] = s0_re - s2_re;
148 dst_re[3] = s1_re - s3_im;
149
150 dst_im[0] = s0_im + s2_im;
151 dst_im[1] = s1_im - s3_re;
152 dst_im[2] = s0_im - s2_im;
153 dst_im[3] = s1_im + s3_re;
154
155 // Move pointers
156 dst_re += 4;
157 dst_im += 4;
158 }
159 }
160
direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)161 static void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
162 {
163 scramble_fft(dst_re, dst_im, src_re, src_im, rank);
164 start_direct_fft(dst_re, dst_im, rank);
165
166 // Prepare for butterflies
167 size_t items = 1 << rank;
168
169 float c_re[4], c_im[4], w_re[4], w_im[4];
170 const float *dw = XFFT_DW;
171 const float *iw_re = XFFT_A_RE;
172 const float *iw_im = XFFT_A_IM;
173
174 // Iterate butterflies
175 for (size_t n=4, bs=n << 1; n < items; n <<= 1, bs <<= 1)
176 {
177 // size_t n=4, bs=n << 1;
178
179 for (size_t p=0; p<items; p += bs)
180 {
181 // printf("rank=%d, iw_re={%.6f, %.6f, %.6f, %.6f}, iw_im={%.6f, %.6f, %.6f, %.6f}, dw={%.6f, %.6f}\n",
182 // int(rank),
183 // iw_re[0], iw_re[1], iw_re[2], iw_re[3],
184 // iw_im[0], iw_im[1], iw_im[2], iw_im[3],
185 // dw[0], dw[1]
186 // );
187
188
189 // Set initial values of pointers
190 float *a_re = &dst_re[p];
191 float *a_im = &dst_im[p];
192 float *b_re = &a_re[n];
193 float *b_im = &a_im[n];
194
195 w_re[0] = iw_re[0];
196 w_re[1] = iw_re[1];
197 w_re[2] = iw_re[2];
198 w_re[3] = iw_re[3];
199 w_im[0] = iw_im[0];
200 w_im[1] = iw_im[1];
201 w_im[2] = iw_im[2];
202 w_im[3] = iw_im[3];
203
204 for (size_t k=0; ;)
205 {
206 // Calculate complex c = w * b
207 c_re[0] = w_re[0] * b_re[0] + w_im[0] * b_im[0];
208 c_re[1] = w_re[1] * b_re[1] + w_im[1] * b_im[1];
209 c_re[2] = w_re[2] * b_re[2] + w_im[2] * b_im[2];
210 c_re[3] = w_re[3] * b_re[3] + w_im[3] * b_im[3];
211
212 c_im[0] = w_re[0] * b_im[0] - w_im[0] * b_re[0];
213 c_im[1] = w_re[1] * b_im[1] - w_im[1] * b_re[1];
214 c_im[2] = w_re[2] * b_im[2] - w_im[2] * b_re[2];
215 c_im[3] = w_re[3] * b_im[3] - w_im[3] * b_re[3];
216
217 // Calculate the output values:
218 // a' = a + c
219 // b' = a - c
220 b_re[0] = a_re[0] - c_re[0];
221 b_re[1] = a_re[1] - c_re[1];
222 b_re[2] = a_re[2] - c_re[2];
223 b_re[3] = a_re[3] - c_re[3];
224
225 b_im[0] = a_im[0] - c_im[0];
226 b_im[1] = a_im[1] - c_im[1];
227 b_im[2] = a_im[2] - c_im[2];
228 b_im[3] = a_im[3] - c_im[3];
229
230 a_re[0] = a_re[0] + c_re[0];
231 a_re[1] = a_re[1] + c_re[1];
232 a_re[2] = a_re[2] + c_re[2];
233 a_re[3] = a_re[3] + c_re[3];
234
235 a_im[0] = a_im[0] + c_im[0];
236 a_im[1] = a_im[1] + c_im[1];
237 a_im[2] = a_im[2] + c_im[2];
238 a_im[3] = a_im[3] + c_im[3];
239
240 // Update pointers
241 a_re += 4;
242 a_im += 4;
243 b_re += 4;
244 b_im += 4;
245
246 if ((k += 4) >= n)
247 break;
248
249 // Rotate w vector
250 c_re[0] = w_re[0]*dw[0] - w_im[0]*dw[1];
251 c_re[1] = w_re[1]*dw[0] - w_im[1]*dw[1];
252 c_re[2] = w_re[2]*dw[0] - w_im[2]*dw[1];
253 c_re[3] = w_re[3]*dw[0] - w_im[3]*dw[1];
254
255 c_im[0] = w_re[0]*dw[1] + w_im[0]*dw[0];
256 c_im[1] = w_re[1]*dw[1] + w_im[1]*dw[0];
257 c_im[2] = w_re[2]*dw[1] + w_im[2]*dw[0];
258 c_im[3] = w_re[3]*dw[1] + w_im[3]*dw[0];
259
260 w_re[0] = c_re[0];
261 w_re[1] = c_re[1];
262 w_re[2] = c_re[2];
263 w_re[3] = c_re[3];
264
265 w_im[0] = c_im[0];
266 w_im[1] = c_im[1];
267 w_im[2] = c_im[2];
268 w_im[3] = c_im[3];
269 }
270 }
271
272 dw += 2;
273 iw_re += 4;
274 iw_im += 4;
275 }
276 }
277
278 namespace native
279 {
280 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
281 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
282 }
283
284 IF_ARCH_X86(
285 namespace sse
286 {
287 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
288 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
289 }
290
291 namespace avx
292 {
293 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
294 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
295
296 void direct_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
297 void reverse_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
298 }
299 )
300
301 IF_ARCH_ARM(
302 namespace neon_d32
303 {
304 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
305 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
306 }
307 )
308
309 IF_ARCH_AARCH64(
310 namespace asimd
311 {
312 void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
313 void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
314 }
315 )
316
317 typedef void (* direct_fft_t)(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
318 typedef void (* reverse_fft_t)(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
319
320 MTEST_BEGIN("dsp.fft", fft)
321
test_direct_fft(const char * text,direct_fft_t direct,const FloatBuffer & x_r,const FloatBuffer & x_i)322 void test_direct_fft(const char *text, direct_fft_t direct, const FloatBuffer &x_r, const FloatBuffer &x_i)
323 {
324 FloatBuffer src_r(BUF_SIZE, 64);
325 FloatBuffer src_i(BUF_SIZE, 64);
326 FloatBuffer dst1r(BUF_SIZE, 64);
327 FloatBuffer dst1i(BUF_SIZE, 64);
328 FloatBuffer dst2r(BUF_SIZE, 64);
329 FloatBuffer dst2i(BUF_SIZE, 64);
330 src_r.copy(x_r);
331 src_i.copy(x_i);
332 dst2r.copy(x_r);
333 dst2i.copy(x_i);
334
335 printf("Testing %s direct FFT...\n", text);
336 src_r.dump("src_r");
337 src_i.dump("src_i");
338
339 direct(dst1r, dst1i, src_r, src_i, RANK);
340 dst1r.dump("dst1r");
341 dst1i.dump("dst1i");
342
343 direct(dst2r, dst2i, dst2r, dst2i, RANK);
344 dst1r.dump("dst2r");
345 dst1i.dump("dst2i");
346
347 MTEST_ASSERT_MSG(x_r.valid(), "x_r corrupted");
348 MTEST_ASSERT_MSG(x_i.valid(), "x_i corrupted");
349 MTEST_ASSERT_MSG(src_r.valid(), "src_r corrupted");
350 MTEST_ASSERT_MSG(src_i.valid(), "src_i corrupted");
351 MTEST_ASSERT_MSG(dst1r.valid(), "dst1r corrupted");
352 MTEST_ASSERT_MSG(dst1i.valid(), "dst1i corrupted");
353 MTEST_ASSERT_MSG(dst2r.valid(), "dst2r corrupted");
354 MTEST_ASSERT_MSG(dst2i.valid(), "dst2i corrupted");
355 }
356
test_reverse_fft(const char * text,reverse_fft_t direct,const FloatBuffer & x_r,const FloatBuffer & x_i)357 void test_reverse_fft(const char *text, reverse_fft_t direct, const FloatBuffer &x_r, const FloatBuffer &x_i)
358 {
359 FloatBuffer src_r(BUF_SIZE, 64);
360 FloatBuffer src_i(BUF_SIZE, 64);
361 FloatBuffer dst1r(BUF_SIZE, 64);
362 FloatBuffer dst1i(BUF_SIZE, 64);
363 FloatBuffer dst2r(BUF_SIZE, 64);
364 FloatBuffer dst2i(BUF_SIZE, 64);
365 src_r.copy(x_r);
366 src_i.copy(x_i);
367 dst2r.copy(x_r);
368 dst2i.copy(x_i);
369
370 printf("Testing %s reverse FFT...\n", text);
371 src_r.dump("src_r");
372 src_i.dump("src_i");
373
374 direct(dst1r, dst1i, src_r, src_i, RANK);
375 dst1r.dump("dst1r");
376 dst1i.dump("dst1i");
377
378 direct(dst2r, dst2i, dst2r, dst2i, RANK);
379 dst1r.dump("dst2r");
380 dst1i.dump("dst2i");
381
382 MTEST_ASSERT_MSG(x_r.valid(), "x_r corrupted");
383 MTEST_ASSERT_MSG(x_i.valid(), "x_i corrupted");
384 MTEST_ASSERT_MSG(src_r.valid(), "src_r corrupted");
385 MTEST_ASSERT_MSG(src_i.valid(), "src_i corrupted");
386 MTEST_ASSERT_MSG(dst1r.valid(), "dst1r corrupted");
387 MTEST_ASSERT_MSG(dst1i.valid(), "dst1i corrupted");
388 MTEST_ASSERT_MSG(dst2r.valid(), "dst2r corrupted");
389 MTEST_ASSERT_MSG(dst2i.valid(), "dst2i corrupted");
390 }
391
392 MTEST_MAIN
393 {
394 FloatBuffer src1r(BUF_SIZE, 64);
395 FloatBuffer src1i(BUF_SIZE, 64);
396 FloatBuffer dst1r(BUF_SIZE, 64);
397 FloatBuffer dst1i(BUF_SIZE, 64);
398
399 // Prepare data
400 for (size_t i=0; i<BUF_SIZE; ++i)
401 {
402 src1r[i] = i;
403 src1i[i] = i * 0.1f;
404 }
405
406 // Test direct FFT
407 test_direct_fft("native", direct_fft, src1r, src1i);
408
409 IF_ARCH_X86(
410 if (TEST_SUPPORTED(sse::direct_fft))
411 test_direct_fft("SSE", sse::direct_fft, src1r, src1i);
412 if (TEST_SUPPORTED(avx::direct_fft))
413 test_direct_fft("AVX", avx::direct_fft, src1r, src1i);
414 if (TEST_SUPPORTED(avx::direct_fft))
415 test_direct_fft("FMA3", avx::direct_fft_fma3, src1r, src1i);
416 );
417
418 IF_ARCH_ARM(
419 if (TEST_SUPPORTED(neon_d32::direct_fft))
420 test_direct_fft("NEON-D32", neon_d32::direct_fft, src1r, src1i);
421 );
422
423 IF_ARCH_AARCH64(
424 if (TEST_SUPPORTED(asimd::direct_fft))
425 test_direct_fft("ASIMD", asimd::direct_fft, src1r, src1i);
426 );
427
428 // Test reverse FFT
429 direct_fft(dst1r, dst1i, src1r, src1i, RANK);
430 printf("\n");
431
432 test_reverse_fft("native", native::reverse_fft, dst1r, dst1i);
433
434 IF_ARCH_X86(
435 if (TEST_SUPPORTED(sse::reverse_fft))
436 test_reverse_fft("SSE", sse::reverse_fft, dst1r, dst1i);
437 if (TEST_SUPPORTED(avx::reverse_fft))
438 test_reverse_fft("AVX", avx::reverse_fft, dst1r, dst1i);
439 if (TEST_SUPPORTED(avx::reverse_fft))
440 test_reverse_fft("FMA3", avx::reverse_fft_fma3, dst1r, dst1i);
441 );
442
443 IF_ARCH_ARM(
444 if (TEST_SUPPORTED(neon_d32::reverse_fft))
445 test_reverse_fft("NEON-D32", neon_d32::reverse_fft, dst1r, dst1i);
446 );
447
448 IF_ARCH_AARCH64(
449 if (TEST_SUPPORTED(asimd::reverse_fft))
450 test_reverse_fft("ASIMD", asimd::reverse_fft, dst1r, dst1i);
451 );
452
453 }
454 MTEST_END
455