1 /*
2  * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3  *           (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4  *
5  * This file is part of lsp-plugins
6  * Created on: 25 окт. 2018 г.
7  *
8  * lsp-plugins is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * any later version.
12  *
13  * lsp-plugins is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <dsp/bits.h>
23 #include <test/mtest.h>
24 #include <test/FloatBuffer.h>
25 
26 #define RANK        6
27 #define BUF_SIZE    (1 << RANK)
28 
29 static const float XFFT_DW[] __lsp_aligned16 =
30 {
31     // Re, Im
32     0.0000000000000000f, 1.0000000000000000f,
33     0.0000000000000000f, 1.0000000000000000f,
34     0.7071067811865475f, 0.7071067811865475f,
35     0.9238795325112868f, 0.3826834323650898f,
36     0.9807852804032305f, 0.1950903220161283f,
37     0.9951847266721969f, 0.0980171403295606f,
38     0.9987954562051724f, 0.0490676743274180f,
39     0.9996988186962042f, 0.0245412285229123f,
40     0.9999247018391445f, 0.0122715382857199f,
41     0.9999811752826011f, 0.0061358846491545f,
42     0.9999952938095762f, 0.0030679567629660f,
43     0.9999988234517019f, 0.0015339801862848f,
44     0.9999997058628822f, 0.0007669903187427f,
45     0.9999999264657179f, 0.0003834951875714f,
46     0.9999999816164293f, 0.0001917475973107f,
47     0.9999999954041073f, 0.0000958737990960f,
48     0.9999999988510268f, 0.0000479368996031f
49 };
50 
51 static const float XFFT_A_RE[] __lsp_aligned16 =
52 {
53     1.0000000000000000f, 0.7071067811865475f, 0.0000000000000000f, -0.7071067811865475f,
54     1.0000000000000000f, 0.9238795325112868f, 0.7071067811865475f, 0.3826834323650898f,
55     1.0000000000000000f, 0.9807852804032305f, 0.9238795325112868f, 0.8314696123025452f,
56     1.0000000000000000f, 0.9951847266721969f, 0.9807852804032305f, 0.9569403357322089f,
57     1.0000000000000000f, 0.9987954562051724f, 0.9951847266721969f, 0.9891765099647810f,
58     1.0000000000000000f, 0.9996988186962042f, 0.9987954562051724f, 0.9972904566786902f,
59     1.0000000000000000f, 0.9999247018391445f, 0.9996988186962042f, 0.9993223845883495f,
60     1.0000000000000000f, 0.9999811752826011f, 0.9999247018391445f, 0.9998305817958234f,
61     1.0000000000000000f, 0.9999952938095762f, 0.9999811752826011f, 0.9999576445519639f,
62     1.0000000000000000f, 0.9999988234517019f, 0.9999952938095762f, 0.9999894110819284f,
63     1.0000000000000000f, 0.9999997058628822f, 0.9999988234517019f, 0.9999973527669782f,
64     1.0000000000000000f, 0.9999999264657179f, 0.9999997058628822f, 0.9999993381915255f,
65     1.0000000000000000f, 0.9999999816164293f, 0.9999999264657179f, 0.9999998345478677f,
66     1.0000000000000000f, 0.9999999954041073f, 0.9999999816164293f, 0.9999999586369661f,
67     1.0000000000000000f, 0.9999999988510268f, 0.9999999954041073f, 0.9999999896592415f
68 };
69 
70 static const float XFFT_A_IM[] __lsp_aligned16 =
71 {
72     0.0000000000000000f, 0.7071067811865475f, 1.0000000000000000f, 0.7071067811865476f,
73     0.0000000000000000f, 0.3826834323650898f, 0.7071067811865475f, 0.9238795325112867f,
74     0.0000000000000000f, 0.1950903220161283f, 0.3826834323650898f, 0.5555702330196022f,
75     0.0000000000000000f, 0.0980171403295606f, 0.1950903220161283f, 0.2902846772544624f,
76     0.0000000000000000f, 0.0490676743274180f, 0.0980171403295606f, 0.1467304744553617f,
77     0.0000000000000000f, 0.0245412285229123f, 0.0490676743274180f, 0.0735645635996674f,
78     0.0000000000000000f, 0.0122715382857199f, 0.0245412285229123f, 0.0368072229413588f,
79     0.0000000000000000f, 0.0061358846491545f, 0.0122715382857199f, 0.0184067299058048f,
80     0.0000000000000000f, 0.0030679567629660f, 0.0061358846491545f, 0.0092037547820598f,
81     0.0000000000000000f, 0.0015339801862848f, 0.0030679567629660f, 0.0046019261204486f,
82     0.0000000000000000f, 0.0007669903187427f, 0.0015339801862848f, 0.0023009691514258f,
83     0.0000000000000000f, 0.0003834951875714f, 0.0007669903187427f, 0.0011504853371138f,
84     0.0000000000000000f, 0.0001917475973107f, 0.0003834951875714f, 0.0005752427637321f,
85     0.0000000000000000f, 0.0000958737990960f, 0.0001917475973107f, 0.0002876213937629f,
86     0.0000000000000000f, 0.0000479368996031f, 0.0000958737990960f, 0.0001438106983686f
87 };
88 
packed_scramble_fft(float * dst,const float * src,size_t rank)89 static void packed_scramble_fft(float *dst, const float *src, size_t rank)
90 {
91     size_t items    = size_t(1) << rank;
92 
93     // Scramble the order of samples
94     if (dst != src)
95     {
96         for (size_t i = 0; i < items; i ++)
97         {
98             size_t j = reverse_bits(i, rank);
99             dst[i*2] = src[j*2];
100             dst[i*2+1] = src[j*2+1];
101         }
102     }
103     else
104     {
105         for (size_t i = 1; i < items; i ++)
106         {
107             size_t j = reverse_bits(i, rank);
108             if (i >= j)
109                 continue;
110 
111             /* Copy the values from the reversed position */
112             float re    = dst[i*2];
113             float im    = dst[i*2+1];
114             dst[i*2]    = dst[j*2];
115             dst[i*2+1]  = dst[j*2+1];
116             dst[j*2]    = re;
117             dst[j*2+1]  = im;
118         }
119     }
120 }
121 
start_packed_direct_fft(float * dst,size_t rank)122 static void start_packed_direct_fft(float *dst, size_t rank)
123 {
124     size_t iterations    = 1 << (rank - 2);
125     while (iterations--)
126     {
127         // Perform 4-calculations
128         // s0' = s0 + s1
129         // s1' = s0 - s1
130         // s2' = s2 + s3
131         // s3' = s2 - s3
132         // s0'' = s0' + s2'
133         // s1'' = s1' - j * s3'
134         // s2'' = s0' - s2'
135         // s3'' = s1' + j * s3'
136         float s0_re     = dst[0] + dst[2];
137         float s1_re     = dst[0] - dst[2];
138         float s0_im     = dst[1] + dst[3];
139         float s1_im     = dst[1] - dst[3];
140 
141         float s2_re     = dst[4] + dst[6];
142         float s3_re     = dst[4] - dst[6];
143         float s2_im     = dst[5] + dst[7];
144         float s3_im     = dst[5] - dst[7];
145 
146         dst[0]          = s0_re + s2_re;
147         dst[1]          = s1_re + s3_im;
148         dst[2]          = s0_re - s2_re;
149         dst[3]          = s1_re - s3_im;
150 
151         dst[4]          = s0_im + s2_im;
152         dst[5]          = s1_im - s3_re;
153         dst[6]          = s0_im - s2_im;
154         dst[7]          = s1_im + s3_re;
155 
156         // Move pointers
157         dst            += 8;
158     }
159 }
160 
start_packed_reverse_fft(float * dst,size_t rank)161 static void start_packed_reverse_fft(float *dst, size_t rank)
162 {
163     size_t iterations    = 1 << (rank - 2);
164     while (iterations--)
165     {
166         // Perform 4-calculations
167         // s0' = s0 + s1
168         // s1' = s0 - s1
169         // s2' = s2 + s3
170         // s3' = s2 - s3
171         // s0'' = s0' + s2'
172         // s1'' = s1' + j * s3'
173         // s2'' = s0' - s2'
174         // s3'' = s1' - j * s3'
175         float s0_re     = dst[0] + dst[2];
176         float s1_re     = dst[0] - dst[2];
177         float s0_im     = dst[1] + dst[3];
178         float s1_im     = dst[1] - dst[3];
179 
180         float s2_re     = dst[4] + dst[6];
181         float s3_re     = dst[4] - dst[6];
182         float s2_im     = dst[5] + dst[7];
183         float s3_im     = dst[5] - dst[7];
184 
185         // Re-shuffle output to store [re0, re1, re2, re3, im0, im1, im2, im3]
186         dst[0]          = s0_re + s2_re;
187         dst[1]          = s1_re - s3_im;
188         dst[2]          = s0_re - s2_re;
189         dst[3]          = s1_re + s3_im;
190 
191         dst[4]          = s0_im + s2_im;
192         dst[5]          = s1_im + s3_re;
193         dst[6]          = s0_im - s2_im;
194         dst[7]          = s1_im - s3_re;
195 
196         // Move pointers
197         dst            += 8;
198     }
199 }
200 
repack_fft(float * dst,size_t rank)201 static void repack_fft(float *dst, size_t rank)
202 {
203     size_t count = 1 << rank;
204     float t[8];
205     for (size_t i=0; i<count; i += 4)
206     {
207         t[0] = dst[0];
208         t[1] = dst[1];
209         t[2] = dst[2];
210         t[3] = dst[3];
211         t[4] = dst[4];
212         t[5] = dst[5];
213         t[6] = dst[6];
214         t[7] = dst[7];
215 
216         dst[0] = t[0];
217         dst[1] = t[4];
218         dst[2] = t[1];
219         dst[3] = t[5];
220         dst[4] = t[2];
221         dst[5] = t[6];
222         dst[6] = t[3];
223         dst[7] = t[7];
224 
225         dst += 8;
226     }
227 }
228 
repack_reverse_fft(float * dst,size_t rank)229 static void repack_reverse_fft(float *dst, size_t rank)
230 {
231     size_t count = 1 << rank;
232     float t[8];
233     float k = 1.0f / count;
234 
235     for (size_t i=0; i<count; i += 4)
236     {
237         t[0] = dst[0];
238         t[1] = dst[1];
239         t[2] = dst[2];
240         t[3] = dst[3];
241         t[4] = dst[4];
242         t[5] = dst[5];
243         t[6] = dst[6];
244         t[7] = dst[7];
245 
246         dst[0] = t[0] * k;
247         dst[1] = t[4] * k;
248         dst[2] = t[1] * k;
249         dst[3] = t[5] * k;
250         dst[4] = t[2] * k;
251         dst[5] = t[6] * k;
252         dst[6] = t[3] * k;
253         dst[7] = t[7] * k;
254 
255         dst += 8;
256     }
257 }
258 
packed_direct_fft(float * dst,const float * src,size_t rank)259 static void packed_direct_fft(float *dst, const float *src, size_t rank)
260 {
261     packed_scramble_fft(dst, src, rank);
262     start_packed_direct_fft(dst, rank);
263 
264     // Prepare for butterflies
265     size_t items    = size_t(1) << (rank + 1);
266 
267     float c_re[4], c_im[4], w_re[4], w_im[4];
268     const float *dw     = XFFT_DW;
269     const float *iw_re  = XFFT_A_RE;
270     const float *iw_im  = XFFT_A_IM;
271 
272     // Iterate butterflies
273     for (size_t n=8, bs=(n << 1); n < items; n <<= 1, bs <<= 1)
274     {
275         for (size_t p=0; p<items; p += bs)
276         {
277             // Set initial values of pointers
278             float *a            = &dst[p];
279             float *b            = &a[n];
280 
281             w_re[0]             = iw_re[0];
282             w_re[1]             = iw_re[1];
283             w_re[2]             = iw_re[2];
284             w_re[3]             = iw_re[3];
285             w_im[0]             = iw_im[0];
286             w_im[1]             = iw_im[1];
287             w_im[2]             = iw_im[2];
288             w_im[3]             = iw_im[3];
289 
290             for (size_t k=0; ;)
291             {
292                 // Calculate complex c = w * b
293                 c_re[0]         = w_re[0] * b[0] + w_im[0] * b[4];
294                 c_re[1]         = w_re[1] * b[1] + w_im[1] * b[5];
295                 c_re[2]         = w_re[2] * b[2] + w_im[2] * b[6];
296                 c_re[3]         = w_re[3] * b[3] + w_im[3] * b[7];
297 
298                 c_im[0]         = w_re[0] * b[4] - w_im[0] * b[0];
299                 c_im[1]         = w_re[1] * b[5] - w_im[1] * b[1];
300                 c_im[2]         = w_re[2] * b[6] - w_im[2] * b[2];
301                 c_im[3]         = w_re[3] * b[7] - w_im[3] * b[3];
302 
303                 // Calculate the output values:
304                 // a'   = a + c
305                 // b'   = a - c
306                 b[0]            = a[0] - c_re[0];
307                 b[1]            = a[1] - c_re[1];
308                 b[2]            = a[2] - c_re[2];
309                 b[3]            = a[3] - c_re[3];
310 
311                 b[4]            = a[4] - c_im[0];
312                 b[5]            = a[5] - c_im[1];
313                 b[6]            = a[6] - c_im[2];
314                 b[7]            = a[7] - c_im[3];
315 
316                 a[0]            = a[0] + c_re[0];
317                 a[1]            = a[1] + c_re[1];
318                 a[2]            = a[2] + c_re[2];
319                 a[3]            = a[3] + c_re[3];
320 
321                 a[4]            = a[4] + c_im[0];
322                 a[5]            = a[5] + c_im[1];
323                 a[6]            = a[6] + c_im[2];
324                 a[7]            = a[7] + c_im[3];
325 
326                 // Update pointers
327                 a              += 8;
328                 b              += 8;
329 
330                 if ((k += 8) >= n)
331                     break;
332 
333                 // Rotate w vector
334                 c_re[0]         = w_re[0]*dw[0] - w_im[0]*dw[1];
335                 c_re[1]         = w_re[1]*dw[0] - w_im[1]*dw[1];
336                 c_re[2]         = w_re[2]*dw[0] - w_im[2]*dw[1];
337                 c_re[3]         = w_re[3]*dw[0] - w_im[3]*dw[1];
338 
339                 c_im[0]         = w_re[0]*dw[1] + w_im[0]*dw[0];
340                 c_im[1]         = w_re[1]*dw[1] + w_im[1]*dw[0];
341                 c_im[2]         = w_re[2]*dw[1] + w_im[2]*dw[0];
342                 c_im[3]         = w_re[3]*dw[1] + w_im[3]*dw[0];
343 
344                 w_re[0]         = c_re[0];
345                 w_re[1]         = c_re[1];
346                 w_re[2]         = c_re[2];
347                 w_re[3]         = c_re[3];
348 
349                 w_im[0]         = c_im[0];
350                 w_im[1]         = c_im[1];
351                 w_im[2]         = c_im[2];
352                 w_im[3]         = c_im[3];
353             }
354         }
355 
356         dw     += 2;
357         iw_re  += 4;
358         iw_im  += 4;
359     }
360 
361     repack_fft(dst, rank);
362 }
363 
packed_reverse_fft(float * dst,const float * src,size_t rank)364 static void packed_reverse_fft(float *dst, const float *src, size_t rank)
365 {
366     packed_scramble_fft(dst, src, rank);
367     start_packed_reverse_fft(dst, rank);
368 
369     // Prepare for butterflies
370     size_t items    = size_t(1) << (rank + 1);
371 
372     float c_re[4], c_im[4], w_re[4], w_im[4];
373     const float *dw     = XFFT_DW;
374     const float *iw_re  = XFFT_A_RE;
375     const float *iw_im  = XFFT_A_IM;
376 
377     // Iterate butterflies
378     for (size_t n=8, bs=(n << 1); n < items; n <<= 1, bs <<= 1)
379     {
380         for (size_t p=0; p<items; p += bs)
381         {
382             // Set initial values of pointers
383             float *a            = &dst[p];
384             float *b            = &a[n];
385 
386             w_re[0]             = iw_re[0];
387             w_re[1]             = iw_re[1];
388             w_re[2]             = iw_re[2];
389             w_re[3]             = iw_re[3];
390             w_im[0]             = iw_im[0];
391             w_im[1]             = iw_im[1];
392             w_im[2]             = iw_im[2];
393             w_im[3]             = iw_im[3];
394 
395             for (size_t k=0; ;)
396             {
397                 // Calculate complex c = w * b
398                 c_re[0]         = w_re[0] * b[0] - w_im[0] * b[4];
399                 c_re[1]         = w_re[1] * b[1] - w_im[1] * b[5];
400                 c_re[2]         = w_re[2] * b[2] - w_im[2] * b[6];
401                 c_re[3]         = w_re[3] * b[3] - w_im[3] * b[7];
402 
403                 c_im[0]         = w_re[0] * b[4] + w_im[0] * b[0];
404                 c_im[1]         = w_re[1] * b[5] + w_im[1] * b[1];
405                 c_im[2]         = w_re[2] * b[6] + w_im[2] * b[2];
406                 c_im[3]         = w_re[3] * b[7] + w_im[3] * b[3];
407 
408                 // Calculate the output values:
409                 // a'   = a + c
410                 // b'   = a - c
411                 b[0]            = a[0] - c_re[0];
412                 b[1]            = a[1] - c_re[1];
413                 b[2]            = a[2] - c_re[2];
414                 b[3]            = a[3] - c_re[3];
415 
416                 b[4]            = a[4] - c_im[0];
417                 b[5]            = a[5] - c_im[1];
418                 b[6]            = a[6] - c_im[2];
419                 b[7]            = a[7] - c_im[3];
420 
421                 a[0]            = a[0] + c_re[0];
422                 a[1]            = a[1] + c_re[1];
423                 a[2]            = a[2] + c_re[2];
424                 a[3]            = a[3] + c_re[3];
425 
426                 a[4]            = a[4] + c_im[0];
427                 a[5]            = a[5] + c_im[1];
428                 a[6]            = a[6] + c_im[2];
429                 a[7]            = a[7] + c_im[3];
430 
431                 // Update pointers
432                 a              += 8;
433                 b              += 8;
434 
435                 if ((k += 8) >= n)
436                     break;
437 
438                 // Rotate w vector
439                 c_re[0]         = w_re[0]*dw[0] - w_im[0]*dw[1];
440                 c_re[1]         = w_re[1]*dw[0] - w_im[1]*dw[1];
441                 c_re[2]         = w_re[2]*dw[0] - w_im[2]*dw[1];
442                 c_re[3]         = w_re[3]*dw[0] - w_im[3]*dw[1];
443 
444                 c_im[0]         = w_re[0]*dw[1] + w_im[0]*dw[0];
445                 c_im[1]         = w_re[1]*dw[1] + w_im[1]*dw[0];
446                 c_im[2]         = w_re[2]*dw[1] + w_im[2]*dw[0];
447                 c_im[3]         = w_re[3]*dw[1] + w_im[3]*dw[0];
448 
449                 w_re[0]         = c_re[0];
450                 w_re[1]         = c_re[1];
451                 w_re[2]         = c_re[2];
452                 w_re[3]         = c_re[3];
453 
454                 w_im[0]         = c_im[0];
455                 w_im[1]         = c_im[1];
456                 w_im[2]         = c_im[2];
457                 w_im[3]         = c_im[3];
458             }
459         }
460 
461         dw     += 2;
462         iw_re  += 4;
463         iw_im  += 4;
464     }
465 
466     repack_reverse_fft(dst, rank);
467 }
468 
469 IF_ARCH_X86(
470     namespace sse
471     {
472         void packed_direct_fft(float *dst, const float *src, size_t rank);
473         void packed_reverse_fft(float *dst, const float *src, size_t rank);
474     }
475 
476     namespace avx
477     {
478         void packed_direct_fft(float *dst, const float *src, size_t rank);
479         void packed_reverse_fft(float *dst, const float *src, size_t rank);
480     }
481 )
482 
483 IF_ARCH_ARM(
484     namespace neon_d32
485     {
486         void packed_direct_fft(float *dst, const float *src, size_t rank);
487         void packed_reverse_fft(float *dst, const float *src, size_t rank);
488     }
489 )
490 
491 IF_ARCH_AARCH64(
492     namespace asimd
493     {
494         void packed_direct_fft(float *dst, const float *src, size_t rank);
495         void packed_reverse_fft(float *dst, const float *src, size_t rank);
496     }
497 )
498 
499 typedef void (* packed_direct_fft_t)(float *dst, const float *src, size_t rank);
500 typedef void (* packed_reverse_fft_t)(float *dst, const float *src, size_t rank);
501 
502 MTEST_BEGIN("dsp.fft", pfft)
503 
test_direct_fft(const char * text,packed_direct_fft_t direct,FloatBuffer & buf)504     void test_direct_fft(const char *text, packed_direct_fft_t direct, FloatBuffer &buf)
505     {
506         FloatBuffer src1(BUF_SIZE*2, 64);
507         FloatBuffer dst1(BUF_SIZE*2, 64);
508         FloatBuffer dst2(BUF_SIZE*2, 64);
509 
510         printf("Testing %s packed direct FFT...\n", text);
511         src1.copy(buf);
512         dst2.copy(buf);
513         src1.dump("src ");
514 
515         direct(dst1, src1, RANK);
516         direct(dst2, dst2, RANK);
517         dst1.dump("dst1");
518         dst2.dump("dst2");
519 
520         MTEST_ASSERT_MSG(src1.valid(), "src corrupted");
521         MTEST_ASSERT_MSG(dst1.valid(), "dst1 corrupted");
522         MTEST_ASSERT_MSG(dst2.valid(), "dst2 corrupted");
523     }
524 
test_reverse_fft(const char * text,packed_direct_fft_t reverse,FloatBuffer & buf)525     void test_reverse_fft(const char *text, packed_direct_fft_t reverse, FloatBuffer &buf)
526     {
527         FloatBuffer src1(BUF_SIZE*2, 64);
528         FloatBuffer dst1(BUF_SIZE*2, 64);
529         FloatBuffer dst2(BUF_SIZE*2, 64);
530 
531         printf("Testing %s packed reverse FFT...\n", text);
532         src1.copy(buf);
533         dst2.copy(buf);
534         src1.dump("src ");
535 
536         reverse(dst1, src1, RANK);
537         reverse(dst2, dst2, RANK);
538         dst1.dump("dst1");
539         dst2.dump("dst2");
540 
541         MTEST_ASSERT_MSG(src1.valid(), "src corrupted");
542         MTEST_ASSERT_MSG(dst1.valid(), "dst1 corrupted");
543         MTEST_ASSERT_MSG(dst2.valid(), "dst2 corrupted");
544     }
545 
546     MTEST_MAIN
547     {
548         FloatBuffer src1(BUF_SIZE*2, 64);
549         FloatBuffer src2(BUF_SIZE*2, 64);
550         FloatBuffer dst1(BUF_SIZE*2, 64);
551         FloatBuffer dst2(BUF_SIZE*2, 64);
552         FloatBuffer bkup(BUF_SIZE*2, 64);
553 
554         // Prepare data
555         for (size_t i=0; i<BUF_SIZE; ++i)
556         {
557             src1[i*2]           = i;
558             src1[i*2+1]         = i * 0.1f;
559         }
560 
561         // Test
562         test_direct_fft("native", packed_direct_fft, src1);
563 
564         IF_ARCH_X86(
565             if (TEST_SUPPORTED(sse::packed_direct_fft))
566                 test_direct_fft("SSE", sse::packed_direct_fft, src1);
567 
568             if (TEST_SUPPORTED(avx::packed_direct_fft))
569                 test_direct_fft("AVX", avx::packed_direct_fft, src1);
570         );
571 
572         IF_ARCH_ARM(
573             if (TEST_SUPPORTED(neon_d32::packed_direct_fft))
574                 test_direct_fft("NEON-D32", neon_d32::packed_direct_fft, src1);
575         );
576 
577         IF_ARCH_AARCH64(
578             if (TEST_SUPPORTED(asimd::packed_direct_fft))
579                 test_direct_fft("ASIMD", asimd::packed_direct_fft, src1);
580         );
581 
582         printf("\n");
583         packed_direct_fft(src2, src1, RANK);
584         test_reverse_fft("native", packed_reverse_fft, src2);
585 
586         IF_ARCH_X86(
587             if (TEST_SUPPORTED(sse::packed_reverse_fft))
588                 test_reverse_fft("SSE", sse::packed_reverse_fft, src2);
589 
590             if (TEST_SUPPORTED(avx::packed_reverse_fft))
591                 test_reverse_fft("AVX", avx::packed_reverse_fft, src2);
592         );
593 
594         IF_ARCH_ARM(
595             if (TEST_SUPPORTED(neon_d32::packed_reverse_fft))
596                 test_reverse_fft("NEON-D32", neon_d32::packed_reverse_fft, src2);
597         );
598 
599         IF_ARCH_AARCH64(
600             if (TEST_SUPPORTED(asimd::packed_reverse_fft))
601                 test_reverse_fft("ASIMD", asimd::packed_reverse_fft, src2);
602         );
603     }
604 MTEST_END
605