1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 25 окт. 2018 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <dsp/bits.h>
23 #include <test/mtest.h>
24 #include <test/FloatBuffer.h>
25 
26 #define RANK        4
27 #define BUF_SIZE    (1 << RANK)
28 
29 static const float XFFT_DW[] __lsp_aligned16 =
30 {
31     // Re, Im
32     0.0000000000000000f, 1.0000000000000000f,
33     0.0000000000000000f, 1.0000000000000000f,
34     0.7071067811865475f, 0.7071067811865475f,
35     0.9238795325112868f, 0.3826834323650898f,
36     0.9807852804032305f, 0.1950903220161283f,
37     0.9951847266721969f, 0.0980171403295606f,
38     0.9987954562051724f, 0.0490676743274180f,
39     0.9996988186962042f, 0.0245412285229123f,
40     0.9999247018391445f, 0.0122715382857199f,
41     0.9999811752826011f, 0.0061358846491545f,
42     0.9999952938095762f, 0.0030679567629660f,
43     0.9999988234517019f, 0.0015339801862848f,
44     0.9999997058628822f, 0.0007669903187427f,
45     0.9999999264657179f, 0.0003834951875714f,
46     0.9999999816164293f, 0.0001917475973107f,
47     0.9999999954041073f, 0.0000958737990960f,
48     0.9999999988510268f, 0.0000479368996031f
49 };
50 
51 static const float XFFT_A_RE[] __lsp_aligned16 =
52 {
53     1.0000000000000000f, 0.7071067811865475f, 0.0000000000000000f, -0.7071067811865475f,
54     1.0000000000000000f, 0.9238795325112868f, 0.7071067811865475f, 0.3826834323650898f,
55     1.0000000000000000f, 0.9807852804032305f, 0.9238795325112868f, 0.8314696123025452f,
56     1.0000000000000000f, 0.9951847266721969f, 0.9807852804032305f, 0.9569403357322089f,
57     1.0000000000000000f, 0.9987954562051724f, 0.9951847266721969f, 0.9891765099647810f,
58     1.0000000000000000f, 0.9996988186962042f, 0.9987954562051724f, 0.9972904566786902f,
59     1.0000000000000000f, 0.9999247018391445f, 0.9996988186962042f, 0.9993223845883495f,
60     1.0000000000000000f, 0.9999811752826011f, 0.9999247018391445f, 0.9998305817958234f,
61     1.0000000000000000f, 0.9999952938095762f, 0.9999811752826011f, 0.9999576445519639f,
62     1.0000000000000000f, 0.9999988234517019f, 0.9999952938095762f, 0.9999894110819284f,
63     1.0000000000000000f, 0.9999997058628822f, 0.9999988234517019f, 0.9999973527669782f,
64     1.0000000000000000f, 0.9999999264657179f, 0.9999997058628822f, 0.9999993381915255f,
65     1.0000000000000000f, 0.9999999816164293f, 0.9999999264657179f, 0.9999998345478677f,
66     1.0000000000000000f, 0.9999999954041073f, 0.9999999816164293f, 0.9999999586369661f,
67     1.0000000000000000f, 0.9999999988510268f, 0.9999999954041073f, 0.9999999896592415f
68 };
69 
70 static const float XFFT_A_IM[] __lsp_aligned16 =
71 {
72     0.0000000000000000f, 0.7071067811865475f, 1.0000000000000000f, 0.7071067811865476f,
73     0.0000000000000000f, 0.3826834323650898f, 0.7071067811865475f, 0.9238795325112867f,
74     0.0000000000000000f, 0.1950903220161283f, 0.3826834323650898f, 0.5555702330196022f,
75     0.0000000000000000f, 0.0980171403295606f, 0.1950903220161283f, 0.2902846772544624f,
76     0.0000000000000000f, 0.0490676743274180f, 0.0980171403295606f, 0.1467304744553617f,
77     0.0000000000000000f, 0.0245412285229123f, 0.0490676743274180f, 0.0735645635996674f,
78     0.0000000000000000f, 0.0122715382857199f, 0.0245412285229123f, 0.0368072229413588f,
79     0.0000000000000000f, 0.0061358846491545f, 0.0122715382857199f, 0.0184067299058048f,
80     0.0000000000000000f, 0.0030679567629660f, 0.0061358846491545f, 0.0092037547820598f,
81     0.0000000000000000f, 0.0015339801862848f, 0.0030679567629660f, 0.0046019261204486f,
82     0.0000000000000000f, 0.0007669903187427f, 0.0015339801862848f, 0.0023009691514258f,
83     0.0000000000000000f, 0.0003834951875714f, 0.0007669903187427f, 0.0011504853371138f,
84     0.0000000000000000f, 0.0001917475973107f, 0.0003834951875714f, 0.0005752427637321f,
85     0.0000000000000000f, 0.0000958737990960f, 0.0001917475973107f, 0.0002876213937629f,
86     0.0000000000000000f, 0.0000479368996031f, 0.0000958737990960f, 0.0001438106983686f
87 };
88 
scramble_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)89 static void scramble_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
90 {
91     size_t items    = 1 << rank;
92 
93     // Scramble the order of samples
94     if ((dst_re != src_re) && (dst_im != src_im))
95     {
96         for (size_t i = 0; i < items; ++i)
97         {
98             size_t j    = reverse_bits(i, rank);
99             dst_re[i]   = src_re[j];
100             dst_im[i]   = src_im[j];
101         }
102     }
103     else
104     {
105         for (size_t i = 1; i < (items - 1); ++i)
106         {
107             size_t j = reverse_bits(i, rank);
108             if (i >= j)
109                 continue;
110 
111             float re    = dst_re[i];
112             float im    = dst_im[i];
113             dst_re[i]   = dst_re[j];
114             dst_im[i]   = dst_im[j];
115             dst_re[j]   = re;
116             dst_im[j]   = im;
117         }
118     }
119 }
120 
start_direct_fft(float * dst_re,float * dst_im,size_t rank)121 static void start_direct_fft(float *dst_re, float *dst_im, size_t rank)
122 {
123     size_t iterations    = 1 << (rank - 2);
124     while (iterations--)
125     {
126         // Perform 4-calculations
127         // s0' = s0 + s1
128         // s1' = s0 - s1
129         // s2' = s2 + s3
130         // s3' = s2 - s3
131         // s0'' = s0' + s2'
132         // s1'' = s1' - j * s3'
133         // s2'' = s0' - s2'
134         // s3'' = s1' + j * s3'
135         float s0_re     = dst_re[0] + dst_re[1];
136         float s1_re     = dst_re[0] - dst_re[1];
137         float s2_re     = dst_re[2] + dst_re[3];
138         float s3_re     = dst_re[2] - dst_re[3];
139 
140         float s0_im     = dst_im[0] + dst_im[1];
141         float s1_im     = dst_im[0] - dst_im[1];
142         float s2_im     = dst_im[2] + dst_im[3];
143         float s3_im     = dst_im[2] - dst_im[3];
144 
145         dst_re[0]       = s0_re + s2_re;
146         dst_re[1]       = s1_re + s3_im;
147         dst_re[2]       = s0_re - s2_re;
148         dst_re[3]       = s1_re - s3_im;
149 
150         dst_im[0]       = s0_im + s2_im;
151         dst_im[1]       = s1_im - s3_re;
152         dst_im[2]       = s0_im - s2_im;
153         dst_im[3]       = s1_im + s3_re;
154 
155         // Move pointers
156         dst_re         += 4;
157         dst_im         += 4;
158     }
159 }
160 
direct_fft(float * dst_re,float * dst_im,const float * src_re,const float * src_im,size_t rank)161 static void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank)
162 {
163     scramble_fft(dst_re, dst_im, src_re, src_im, rank);
164     start_direct_fft(dst_re, dst_im, rank);
165 
166     // Prepare for butterflies
167     size_t items    = 1 << rank;
168 
169     float c_re[4], c_im[4], w_re[4], w_im[4];
170     const float *dw     = XFFT_DW;
171     const float *iw_re  = XFFT_A_RE;
172     const float *iw_im  = XFFT_A_IM;
173 
174     // Iterate butterflies
175     for (size_t n=4, bs=n << 1; n < items; n <<= 1, bs <<= 1)
176     {
177 //        size_t n=4, bs=n << 1;
178 
179         for (size_t p=0; p<items; p += bs)
180         {
181 //            printf("rank=%d, iw_re={%.6f, %.6f, %.6f, %.6f}, iw_im={%.6f, %.6f, %.6f, %.6f}, dw={%.6f, %.6f}\n",
182 //                    int(rank),
183 //                    iw_re[0], iw_re[1], iw_re[2], iw_re[3],
184 //                    iw_im[0], iw_im[1], iw_im[2], iw_im[3],
185 //                    dw[0], dw[1]
186 //            );
187 
188 
189             // Set initial values of pointers
190             float *a_re         = &dst_re[p];
191             float *a_im         = &dst_im[p];
192             float *b_re         = &a_re[n];
193             float *b_im         = &a_im[n];
194 
195             w_re[0]             = iw_re[0];
196             w_re[1]             = iw_re[1];
197             w_re[2]             = iw_re[2];
198             w_re[3]             = iw_re[3];
199             w_im[0]             = iw_im[0];
200             w_im[1]             = iw_im[1];
201             w_im[2]             = iw_im[2];
202             w_im[3]             = iw_im[3];
203 
204             for (size_t k=0; ;)
205             {
206                 // Calculate complex c = w * b
207                 c_re[0]         = w_re[0] * b_re[0] + w_im[0] * b_im[0];
208                 c_re[1]         = w_re[1] * b_re[1] + w_im[1] * b_im[1];
209                 c_re[2]         = w_re[2] * b_re[2] + w_im[2] * b_im[2];
210                 c_re[3]         = w_re[3] * b_re[3] + w_im[3] * b_im[3];
211 
212                 c_im[0]         = w_re[0] * b_im[0] - w_im[0] * b_re[0];
213                 c_im[1]         = w_re[1] * b_im[1] - w_im[1] * b_re[1];
214                 c_im[2]         = w_re[2] * b_im[2] - w_im[2] * b_re[2];
215                 c_im[3]         = w_re[3] * b_im[3] - w_im[3] * b_re[3];
216 
217                 // Calculate the output values:
218                 // a'   = a + c
219                 // b'   = a - c
220                 b_re[0]         = a_re[0] - c_re[0];
221                 b_re[1]         = a_re[1] - c_re[1];
222                 b_re[2]         = a_re[2] - c_re[2];
223                 b_re[3]         = a_re[3] - c_re[3];
224 
225                 b_im[0]         = a_im[0] - c_im[0];
226                 b_im[1]         = a_im[1] - c_im[1];
227                 b_im[2]         = a_im[2] - c_im[2];
228                 b_im[3]         = a_im[3] - c_im[3];
229 
230                 a_re[0]         = a_re[0] + c_re[0];
231                 a_re[1]         = a_re[1] + c_re[1];
232                 a_re[2]         = a_re[2] + c_re[2];
233                 a_re[3]         = a_re[3] + c_re[3];
234 
235                 a_im[0]         = a_im[0] + c_im[0];
236                 a_im[1]         = a_im[1] + c_im[1];
237                 a_im[2]         = a_im[2] + c_im[2];
238                 a_im[3]         = a_im[3] + c_im[3];
239 
240                 // Update pointers
241                 a_re           += 4;
242                 a_im           += 4;
243                 b_re           += 4;
244                 b_im           += 4;
245 
246                 if ((k += 4) >= n)
247                     break;
248 
249                 // Rotate w vector
250                 c_re[0]         = w_re[0]*dw[0] - w_im[0]*dw[1];
251                 c_re[1]         = w_re[1]*dw[0] - w_im[1]*dw[1];
252                 c_re[2]         = w_re[2]*dw[0] - w_im[2]*dw[1];
253                 c_re[3]         = w_re[3]*dw[0] - w_im[3]*dw[1];
254 
255                 c_im[0]         = w_re[0]*dw[1] + w_im[0]*dw[0];
256                 c_im[1]         = w_re[1]*dw[1] + w_im[1]*dw[0];
257                 c_im[2]         = w_re[2]*dw[1] + w_im[2]*dw[0];
258                 c_im[3]         = w_re[3]*dw[1] + w_im[3]*dw[0];
259 
260                 w_re[0]         = c_re[0];
261                 w_re[1]         = c_re[1];
262                 w_re[2]         = c_re[2];
263                 w_re[3]         = c_re[3];
264 
265                 w_im[0]         = c_im[0];
266                 w_im[1]         = c_im[1];
267                 w_im[2]         = c_im[2];
268                 w_im[3]         = c_im[3];
269             }
270         }
271 
272         dw     += 2;
273         iw_re  += 4;
274         iw_im  += 4;
275     }
276 }
277 
278 namespace native
279 {
280     void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
281     void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
282 }
283 
284 IF_ARCH_X86(
285     namespace sse
286     {
287         void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
288         void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
289     }
290 
291     namespace avx
292     {
293         void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
294         void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
295 
296         void direct_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
297         void reverse_fft_fma3(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
298     }
299 )
300 
301 IF_ARCH_ARM(
302     namespace neon_d32
303     {
304         void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
305         void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
306     }
307 )
308 
309 IF_ARCH_AARCH64(
310     namespace asimd
311     {
312         void direct_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
313         void reverse_fft(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
314     }
315 )
316 
317 typedef void (* direct_fft_t)(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
318 typedef void (* reverse_fft_t)(float *dst_re, float *dst_im, const float *src_re, const float *src_im, size_t rank);
319 
320 MTEST_BEGIN("dsp.fft", fft)
321 
test_direct_fft(const char * text,direct_fft_t direct,const FloatBuffer & x_r,const FloatBuffer & x_i)322     void test_direct_fft(const char *text, direct_fft_t direct, const FloatBuffer &x_r, const FloatBuffer &x_i)
323     {
324         FloatBuffer src_r(BUF_SIZE, 64);
325         FloatBuffer src_i(BUF_SIZE, 64);
326         FloatBuffer dst1r(BUF_SIZE, 64);
327         FloatBuffer dst1i(BUF_SIZE, 64);
328         FloatBuffer dst2r(BUF_SIZE, 64);
329         FloatBuffer dst2i(BUF_SIZE, 64);
330         src_r.copy(x_r);
331         src_i.copy(x_i);
332         dst2r.copy(x_r);
333         dst2i.copy(x_i);
334 
335         printf("Testing %s direct FFT...\n", text);
336         src_r.dump("src_r");
337         src_i.dump("src_i");
338 
339         direct(dst1r, dst1i, src_r, src_i, RANK);
340         dst1r.dump("dst1r");
341         dst1i.dump("dst1i");
342 
343         direct(dst2r, dst2i, dst2r, dst2i, RANK);
344         dst1r.dump("dst2r");
345         dst1i.dump("dst2i");
346 
347         MTEST_ASSERT_MSG(x_r.valid(), "x_r corrupted");
348         MTEST_ASSERT_MSG(x_i.valid(), "x_i corrupted");
349         MTEST_ASSERT_MSG(src_r.valid(), "src_r corrupted");
350         MTEST_ASSERT_MSG(src_i.valid(), "src_i corrupted");
351         MTEST_ASSERT_MSG(dst1r.valid(), "dst1r corrupted");
352         MTEST_ASSERT_MSG(dst1i.valid(), "dst1i corrupted");
353         MTEST_ASSERT_MSG(dst2r.valid(), "dst2r corrupted");
354         MTEST_ASSERT_MSG(dst2i.valid(), "dst2i corrupted");
355     }
356 
test_reverse_fft(const char * text,reverse_fft_t direct,const FloatBuffer & x_r,const FloatBuffer & x_i)357     void test_reverse_fft(const char *text, reverse_fft_t direct, const FloatBuffer &x_r, const FloatBuffer &x_i)
358     {
359         FloatBuffer src_r(BUF_SIZE, 64);
360         FloatBuffer src_i(BUF_SIZE, 64);
361         FloatBuffer dst1r(BUF_SIZE, 64);
362         FloatBuffer dst1i(BUF_SIZE, 64);
363         FloatBuffer dst2r(BUF_SIZE, 64);
364         FloatBuffer dst2i(BUF_SIZE, 64);
365         src_r.copy(x_r);
366         src_i.copy(x_i);
367         dst2r.copy(x_r);
368         dst2i.copy(x_i);
369 
370         printf("Testing %s reverse FFT...\n", text);
371         src_r.dump("src_r");
372         src_i.dump("src_i");
373 
374         direct(dst1r, dst1i, src_r, src_i, RANK);
375         dst1r.dump("dst1r");
376         dst1i.dump("dst1i");
377 
378         direct(dst2r, dst2i, dst2r, dst2i, RANK);
379         dst1r.dump("dst2r");
380         dst1i.dump("dst2i");
381 
382         MTEST_ASSERT_MSG(x_r.valid(), "x_r corrupted");
383         MTEST_ASSERT_MSG(x_i.valid(), "x_i corrupted");
384         MTEST_ASSERT_MSG(src_r.valid(), "src_r corrupted");
385         MTEST_ASSERT_MSG(src_i.valid(), "src_i corrupted");
386         MTEST_ASSERT_MSG(dst1r.valid(), "dst1r corrupted");
387         MTEST_ASSERT_MSG(dst1i.valid(), "dst1i corrupted");
388         MTEST_ASSERT_MSG(dst2r.valid(), "dst2r corrupted");
389         MTEST_ASSERT_MSG(dst2i.valid(), "dst2i corrupted");
390     }
391 
392     MTEST_MAIN
393     {
394         FloatBuffer src1r(BUF_SIZE, 64);
395         FloatBuffer src1i(BUF_SIZE, 64);
396         FloatBuffer dst1r(BUF_SIZE, 64);
397         FloatBuffer dst1i(BUF_SIZE, 64);
398 
399         // Prepare data
400         for (size_t i=0; i<BUF_SIZE; ++i)
401         {
402             src1r[i]            = i;
403             src1i[i]            = i * 0.1f;
404         }
405 
406         // Test direct FFT
407         test_direct_fft("native", direct_fft, src1r, src1i);
408 
409         IF_ARCH_X86(
410             if (TEST_SUPPORTED(sse::direct_fft))
411                 test_direct_fft("SSE", sse::direct_fft, src1r, src1i);
412             if (TEST_SUPPORTED(avx::direct_fft))
413                 test_direct_fft("AVX", avx::direct_fft, src1r, src1i);
414             if (TEST_SUPPORTED(avx::direct_fft))
415                 test_direct_fft("FMA3", avx::direct_fft_fma3, src1r, src1i);
416         );
417 
418         IF_ARCH_ARM(
419             if (TEST_SUPPORTED(neon_d32::direct_fft))
420                 test_direct_fft("NEON-D32", neon_d32::direct_fft, src1r, src1i);
421         );
422 
423         IF_ARCH_AARCH64(
424             if (TEST_SUPPORTED(asimd::direct_fft))
425                 test_direct_fft("ASIMD", asimd::direct_fft, src1r, src1i);
426         );
427 
428         // Test reverse FFT
429         direct_fft(dst1r, dst1i, src1r, src1i, RANK);
430         printf("\n");
431 
432         test_reverse_fft("native", native::reverse_fft, dst1r, dst1i);
433 
434         IF_ARCH_X86(
435             if (TEST_SUPPORTED(sse::reverse_fft))
436                 test_reverse_fft("SSE", sse::reverse_fft, dst1r, dst1i);
437             if (TEST_SUPPORTED(avx::reverse_fft))
438                 test_reverse_fft("AVX", avx::reverse_fft, dst1r, dst1i);
439             if (TEST_SUPPORTED(avx::reverse_fft))
440                 test_reverse_fft("FMA3", avx::reverse_fft_fma3, dst1r, dst1i);
441         );
442 
443         IF_ARCH_ARM(
444             if (TEST_SUPPORTED(neon_d32::reverse_fft))
445                 test_reverse_fft("NEON-D32", neon_d32::reverse_fft, dst1r, dst1i);
446         );
447 
448         IF_ARCH_AARCH64(
449             if (TEST_SUPPORTED(asimd::reverse_fft))
450                 test_reverse_fft("ASIMD", asimd::reverse_fft, dst1r, dst1i);
451         );
452 
453     }
454 MTEST_END
455