1 /*
2 * Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3 * (C) 2020 Vladimir Sadovnikov <sadko4u@gmail.com>
4 *
5 * This file is part of lsp-plugins
6 * Created on: 31 окт. 2018 г.
7 *
8 * lsp-plugins is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * any later version.
12 *
13 * lsp-plugins is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with lsp-plugins. If not, see <https://www.gnu.org/licenses/>.
20 */
21
22 #include <dsp/dsp.h>
23 #include <test/mtest.h>
24 #include <test/FloatBuffer.h>
25 #include <core/debug.h>
26
27 #define RANK 6
28 #define BUF_SIZE (1 << RANK)
29
30 static const float XFFT_DW[] __lsp_aligned16 =
31 {
32 // Re, Im
33 0.0000000000000000f, 1.0000000000000000f,
34 0.0000000000000000f, 1.0000000000000000f,
35 0.7071067811865475f, 0.7071067811865475f,
36 0.9238795325112868f, 0.3826834323650898f,
37 0.9807852804032305f, 0.1950903220161283f,
38 0.9951847266721969f, 0.0980171403295606f,
39 0.9987954562051724f, 0.0490676743274180f,
40 0.9996988186962042f, 0.0245412285229123f,
41 0.9999247018391445f, 0.0122715382857199f,
42 0.9999811752826011f, 0.0061358846491545f,
43 0.9999952938095762f, 0.0030679567629660f,
44 0.9999988234517019f, 0.0015339801862848f,
45 0.9999997058628822f, 0.0007669903187427f,
46 0.9999999264657179f, 0.0003834951875714f,
47 0.9999999816164293f, 0.0001917475973107f,
48 0.9999999954041073f, 0.0000958737990960f,
49 0.9999999988510268f, 0.0000479368996031f
50 };
51
52 static const float XFFT_A_RE[] __lsp_aligned16 =
53 {
54 1.0000000000000000f, 0.7071067811865475f, 0.0000000000000000f, -0.7071067811865475f,
55 1.0000000000000000f, 0.9238795325112868f, 0.7071067811865475f, 0.3826834323650898f,
56 1.0000000000000000f, 0.9807852804032305f, 0.9238795325112868f, 0.8314696123025452f,
57 1.0000000000000000f, 0.9951847266721969f, 0.9807852804032305f, 0.9569403357322089f,
58 1.0000000000000000f, 0.9987954562051724f, 0.9951847266721969f, 0.9891765099647810f,
59 1.0000000000000000f, 0.9996988186962042f, 0.9987954562051724f, 0.9972904566786902f,
60 1.0000000000000000f, 0.9999247018391445f, 0.9996988186962042f, 0.9993223845883495f,
61 1.0000000000000000f, 0.9999811752826011f, 0.9999247018391445f, 0.9998305817958234f,
62 1.0000000000000000f, 0.9999952938095762f, 0.9999811752826011f, 0.9999576445519639f,
63 1.0000000000000000f, 0.9999988234517019f, 0.9999952938095762f, 0.9999894110819284f,
64 1.0000000000000000f, 0.9999997058628822f, 0.9999988234517019f, 0.9999973527669782f,
65 1.0000000000000000f, 0.9999999264657179f, 0.9999997058628822f, 0.9999993381915255f,
66 1.0000000000000000f, 0.9999999816164293f, 0.9999999264657179f, 0.9999998345478677f,
67 1.0000000000000000f, 0.9999999954041073f, 0.9999999816164293f, 0.9999999586369661f,
68 1.0000000000000000f, 0.9999999988510268f, 0.9999999954041073f, 0.9999999896592415f
69 };
70
71 static const float XFFT_A_IM[] __lsp_aligned16 =
72 {
73 0.0000000000000000f, 0.7071067811865475f, 1.0000000000000000f, 0.7071067811865476f,
74 0.0000000000000000f, 0.3826834323650898f, 0.7071067811865475f, 0.9238795325112867f,
75 0.0000000000000000f, 0.1950903220161283f, 0.3826834323650898f, 0.5555702330196022f,
76 0.0000000000000000f, 0.0980171403295606f, 0.1950903220161283f, 0.2902846772544624f,
77 0.0000000000000000f, 0.0490676743274180f, 0.0980171403295606f, 0.1467304744553617f,
78 0.0000000000000000f, 0.0245412285229123f, 0.0490676743274180f, 0.0735645635996674f,
79 0.0000000000000000f, 0.0122715382857199f, 0.0245412285229123f, 0.0368072229413588f,
80 0.0000000000000000f, 0.0061358846491545f, 0.0122715382857199f, 0.0184067299058048f,
81 0.0000000000000000f, 0.0030679567629660f, 0.0061358846491545f, 0.0092037547820598f,
82 0.0000000000000000f, 0.0015339801862848f, 0.0030679567629660f, 0.0046019261204486f,
83 0.0000000000000000f, 0.0007669903187427f, 0.0015339801862848f, 0.0023009691514258f,
84 0.0000000000000000f, 0.0003834951875714f, 0.0007669903187427f, 0.0011504853371138f,
85 0.0000000000000000f, 0.0001917475973107f, 0.0003834951875714f, 0.0005752427637321f,
86 0.0000000000000000f, 0.0000958737990960f, 0.0001917475973107f, 0.0002876213937629f,
87 0.0000000000000000f, 0.0000479368996031f, 0.0000958737990960f, 0.0001438106983686f
88 };
89
fastconv_parse(float * dst,const float * src,size_t rank)90 static void fastconv_parse(float *dst, const float *src, size_t rank)
91 {
92 // Prepare for butterflies
93 float c_re[4], c_im[4], w_re[4], w_im[4];
94 const float *dw = &XFFT_DW[(rank - 3) << 1];
95 const float *iw_re = &XFFT_A_RE[(rank-3) << 2];
96 const float *iw_im = &XFFT_A_IM[(rank-3) << 2];
97 size_t items = size_t(1) << (rank + 1);
98 size_t bs = items;
99 size_t n = bs >> 1;
100
101 // Iterate first cycle
102 if (n > 4)
103 {
104 // ONE LARGE CYCLE
105 // Set initial values of pointers
106 float *a = dst;
107 float *b = &a[n];
108
109 w_re[0] = iw_re[0];
110 w_re[1] = iw_re[1];
111 w_re[2] = iw_re[2];
112 w_re[3] = iw_re[3];
113 w_im[0] = iw_im[0];
114 w_im[1] = iw_im[1];
115 w_im[2] = iw_im[2];
116 w_im[3] = iw_im[3];
117
118 for (size_t k=0; ;)
119 {
120 // Calculate the output values:
121 // a' = a + 0
122 // b' = (a-0) * w
123 a[0] = src[0];
124 a[1] = src[1];
125 a[2] = src[2];
126 a[3] = src[3];
127
128 a[4] = 0.0f;
129 a[5] = 0.0f;
130 a[6] = 0.0f;
131 a[7] = 0.0f;
132
133 // Calculate complex c = w * b
134 b[0] = w_re[0] * a[0];
135 b[1] = w_re[1] * a[1];
136 b[2] = w_re[2] * a[2];
137 b[3] = w_re[3] * a[3];
138
139 b[4] = -w_im[0] * a[0];
140 b[5] = -w_im[1] * a[1];
141 b[6] = -w_im[2] * a[2];
142 b[7] = -w_im[3] * a[3];
143
144 // Update pointers
145 a += 8;
146 b += 8;
147 src += 4;
148
149 if ((k += 8) >= n)
150 break;
151
152 // Rotate w vector
153 c_re[0] = w_re[0]*dw[0] - w_im[0]*dw[1];
154 c_re[1] = w_re[1]*dw[0] - w_im[1]*dw[1];
155 c_re[2] = w_re[2]*dw[0] - w_im[2]*dw[1];
156 c_re[3] = w_re[3]*dw[0] - w_im[3]*dw[1];
157
158 c_im[0] = w_re[0]*dw[1] + w_im[0]*dw[0];
159 c_im[1] = w_re[1]*dw[1] + w_im[1]*dw[0];
160 c_im[2] = w_re[2]*dw[1] + w_im[2]*dw[0];
161 c_im[3] = w_re[3]*dw[1] + w_im[3]*dw[0];
162
163 w_re[0] = c_re[0];
164 w_re[1] = c_re[1];
165 w_re[2] = c_re[2];
166 w_re[3] = c_re[3];
167
168 w_im[0] = c_im[0];
169 w_im[1] = c_im[1];
170 w_im[2] = c_im[2];
171 w_im[3] = c_im[3];
172 }
173
174 dw -= 2;
175 iw_re -= 4;
176 iw_im -= 4;
177
178 n >>= 1;
179 bs >>= 1;
180 }
181 else
182 {
183 // Unpack 4x real to 4x split complex
184 dst[0] = src[0];
185 dst[1] = src[1];
186 dst[2] = src[2];
187 dst[3] = src[3];
188
189 dst[4] = 0.0f;
190 dst[5] = 0.0f;
191 dst[6] = 0.0f;
192 dst[7] = 0.0f;
193 }
194
195 // Iterate butterflies
196 for (; n > 4; n >>= 1, bs >>= 1)
197 {
198 for (size_t p=0; p<items; p += bs)
199 {
200 // Set initial values of pointers
201 float *a = &dst[p];
202 float *b = &a[n];
203
204 w_re[0] = iw_re[0];
205 w_re[1] = iw_re[1];
206 w_re[2] = iw_re[2];
207 w_re[3] = iw_re[3];
208 w_im[0] = iw_im[0];
209 w_im[1] = iw_im[1];
210 w_im[2] = iw_im[2];
211 w_im[3] = iw_im[3];
212
213 for (size_t k=0; ;)
214 {
215 // Calculate the output values:
216 // c = a - b
217 // a' = a + b
218 // b' = c * w
219 c_re[0] = a[0] - b[0];
220 c_re[1] = a[1] - b[1];
221 c_re[2] = a[2] - b[2];
222 c_re[3] = a[3] - b[3];
223
224 c_im[0] = a[4] - b[4];
225 c_im[1] = a[5] - b[5];
226 c_im[2] = a[6] - b[6];
227 c_im[3] = a[7] - b[7];
228
229 a[0] = a[0] + b[0];
230 a[1] = a[1] + b[1];
231 a[2] = a[2] + b[2];
232 a[3] = a[3] + b[3];
233
234 a[4] = a[4] + b[4];
235 a[5] = a[5] + b[5];
236 a[6] = a[6] + b[6];
237 a[7] = a[7] + b[7];
238
239 // Calculate complex c = w * b
240 b[0] = w_re[0] * c_re[0] + w_im[0] * c_im[0];
241 b[1] = w_re[1] * c_re[1] + w_im[1] * c_im[1];
242 b[2] = w_re[2] * c_re[2] + w_im[2] * c_im[2];
243 b[3] = w_re[3] * c_re[3] + w_im[3] * c_im[3];
244
245 b[4] = w_re[0] * c_im[0] - w_im[0] * c_re[0];
246 b[5] = w_re[1] * c_im[1] - w_im[1] * c_re[1];
247 b[6] = w_re[2] * c_im[2] - w_im[2] * c_re[2];
248 b[7] = w_re[3] * c_im[3] - w_im[3] * c_re[3];
249
250 // Update pointers
251 a += 8;
252 b += 8;
253
254 if ((k += 8) >= n)
255 break;
256
257 // Rotate w vector
258 c_re[0] = w_re[0]*dw[0] - w_im[0]*dw[1];
259 c_re[1] = w_re[1]*dw[0] - w_im[1]*dw[1];
260 c_re[2] = w_re[2]*dw[0] - w_im[2]*dw[1];
261 c_re[3] = w_re[3]*dw[0] - w_im[3]*dw[1];
262
263 c_im[0] = w_re[0]*dw[1] + w_im[0]*dw[0];
264 c_im[1] = w_re[1]*dw[1] + w_im[1]*dw[0];
265 c_im[2] = w_re[2]*dw[1] + w_im[2]*dw[0];
266 c_im[3] = w_re[3]*dw[1] + w_im[3]*dw[0];
267
268 w_re[0] = c_re[0];
269 w_re[1] = c_re[1];
270 w_re[2] = c_re[2];
271 w_re[3] = c_re[3];
272
273 w_im[0] = c_im[0];
274 w_im[1] = c_im[1];
275 w_im[2] = c_im[2];
276 w_im[3] = c_im[3];
277 }
278 }
279
280 dw -= 2;
281 iw_re -= 4;
282 iw_im -= 4;
283 }
284
285 // Add two last stages
286 for (size_t i=0; i<items; i += 8)
287 {
288 // s0' = s0 + s2 = (r0 + r2) + j*(i0 + i2)
289 // s1' = s1 + s3 = (r1 + r3) + j*(i1 + i3)
290 // s2' = s0 - s2 = 1*((r0 - r2) + j*(i0 - i2)) = (r0 - r2) + j*(i0 - i2)
291 // s3' = -j*(s1 - s3) = -j*((r1 - r3) + j*(i1 - i3)) = (i1 - i3) - j*(r1 - r3)
292
293 // s0" = s0' + s1' = (r0 + r2) + j*(i0 + i2) + (r1 + r3) + j*(i1 + i3)
294 // s1" = s0' - s1' = (r0 + r2) + j*(i0 + i2) - (r1 + r3) - j*(i1 + i3)
295 // s2" = s2' + s3' = (r0 - r2) + j*(i0 - i2) + (i1 - i3) - j*(r1 - r3)
296 // s3" = s2' - s3' = (r0 - r2) + j*(i0 - i2) - (i1 - i3) + j*(r1 - r3)
297
298 float r0k = dst[0] + dst[2];
299 float r1k = dst[0] - dst[2];
300 float r2k = dst[1] + dst[3];
301 float r3k = dst[1] - dst[3];
302
303 float i0k = dst[4] + dst[6];
304 float i1k = dst[4] - dst[6];
305 float i2k = dst[5] + dst[7];
306 float i3k = dst[5] - dst[7];
307
308 // dst[0] = r0k;
309 // dst[1] = r2k;
310 // dst[2] = r1k;
311 // dst[3] = i3k;
312 //
313 // dst[4] = i0k;
314 // dst[5] = i2k;
315 // dst[6] = i1k;
316 // dst[7] = r3k;
317
318 dst[0] = r0k + r2k;
319 dst[1] = r0k - r2k;
320 dst[2] = r1k + i3k;
321 dst[3] = r1k - i3k;
322
323 dst[4] = i0k + i2k;
324 dst[5] = i0k - i2k;
325 dst[6] = i1k - r3k;
326 dst[7] = i1k + r3k;
327
328 dst += 8;
329 }
330
331 // Now all complex numbers are stored in the following rormat:
332 // [r0 r1 r2 r3 i0 i1 i2 i3 r4 r5 r6 r7 i4 i5 i6 i7 ... ]
333 }
334
fastconv_restore(float * dst,float * tmp,size_t rank)335 static void fastconv_restore(float *dst, float *tmp, size_t rank)
336 {
337 float c_re[4], c_im[4], w_re[4], w_im[4];
338
339 // Prepare for butterflies
340 size_t last = size_t(1) << rank;
341 size_t items = last << 1;
342 size_t n = 8;
343 size_t bs = n << 1;
344
345 // Add two last stages
346 float *d = tmp;
347
348 // All complex numbers are stored in the following format:
349 // [r0 r1 r2 r3 i0 i1 i2 i3 r4 r5 r6 r7 i4 i5 i6 i7 ... ]
350 for (size_t i=0; i<items; i += 8)
351 {
352 // s0' = s0 + s1 = (r0 + r1) + j*(i0 + i1)
353 // s1' = s0 - s1 = (r0 - r1) + j*(i0 - i1)
354 // s2' = s2 + s3 = (r2 + r3) + j*(i2 + i3)
355 // s3' = s2 - s3 = (r2 - r3) + j*(i2 - i3)
356
357 // s0" = s0' + s2' = (r0 + r1) + j*(i0 + i1) + (r2 + r3) + j*(i2 + i3)
358 // s1" = s1' + j*s3' = (r0 - r1) + j*(i0 - i1) - (i2 - i3) + j*(r2 - r3)
359 // s2" = s0' - s2' = (r0 + r1) + j*(i0 + i1) - (r2 + r3) - j*(i2 + i3)
360 // s3" = s1' - j*s3' = (r0 - r1) + j*(i0 - i1) + (i2 - i3) - j*(r2 - r3)
361
362 float r0k = d[0] + d[1];
363 float r1k = d[0] - d[1];
364 float r2k = d[2] + d[3];
365 float r3k = d[2] - d[3];
366
367 float i0k = d[4] + d[5];
368 float i1k = d[4] - d[5];
369 float i2k = d[6] + d[7];
370 float i3k = d[6] - d[7];
371
372 d[0] = r0k + r2k;
373 d[1] = r1k - i3k;
374 d[2] = r0k - r2k;
375 d[3] = r1k + i3k;
376
377 d[4] = i0k + i2k;
378 d[5] = i1k + r3k;
379 d[6] = i0k - i2k;
380 d[7] = i1k - r3k;
381
382 d += 8;
383 }
384
385 const float *dw = XFFT_DW;
386 const float *iw_re = XFFT_A_RE;
387 const float *iw_im = XFFT_A_IM;
388
389 // Iterate butterflies
390 while (n < last)
391 {
392 // lsp_dumpf("iw_re", "%.6f", iw_re, 4);
393 // lsp_dumpf("iw_im", "%.6f", iw_im, 4);
394 // lsp_dumpf("dw ", "%.6f", dw, 2);
395
396 for (size_t p=0; p<items; p += bs)
397 {
398 // Set initial values of pointers
399 float *a = &tmp[p];
400 float *b = &a[n];
401
402 w_re[0] = iw_re[0];
403 w_re[1] = iw_re[1];
404 w_re[2] = iw_re[2];
405 w_re[3] = iw_re[3];
406 w_im[0] = iw_im[0];
407 w_im[1] = iw_im[1];
408 w_im[2] = iw_im[2];
409 w_im[3] = iw_im[3];
410
411 for (size_t k=0; ;)
412 {
413 // Calculate complex c = w * b
414 c_re[0] = w_re[0] * b[0] - w_im[0] * b[4];
415 c_re[1] = w_re[1] * b[1] - w_im[1] * b[5];
416 c_re[2] = w_re[2] * b[2] - w_im[2] * b[6];
417 c_re[3] = w_re[3] * b[3] - w_im[3] * b[7];
418
419 c_im[0] = w_re[0] * b[4] + w_im[0] * b[0];
420 c_im[1] = w_re[1] * b[5] + w_im[1] * b[1];
421 c_im[2] = w_re[2] * b[6] + w_im[2] * b[2];
422 c_im[3] = w_re[3] * b[7] + w_im[3] * b[3];
423
424 // Calculate the output values:
425 // a' = a + c
426 // b' = a - c
427 b[0] = a[0] - c_re[0];
428 b[1] = a[1] - c_re[1];
429 b[2] = a[2] - c_re[2];
430 b[3] = a[3] - c_re[3];
431
432 b[4] = a[4] - c_im[0];
433 b[5] = a[5] - c_im[1];
434 b[6] = a[6] - c_im[2];
435 b[7] = a[7] - c_im[3];
436
437 a[0] = a[0] + c_re[0];
438 a[1] = a[1] + c_re[1];
439 a[2] = a[2] + c_re[2];
440 a[3] = a[3] + c_re[3];
441
442 a[4] = a[4] + c_im[0];
443 a[5] = a[5] + c_im[1];
444 a[6] = a[6] + c_im[2];
445 a[7] = a[7] + c_im[3];
446
447 // Update pointers
448 a += 8;
449 b += 8;
450
451 if ((k += 8) >= n)
452 break;
453
454 // Rotate w vector
455 c_re[0] = w_re[0]*dw[0] - w_im[0]*dw[1];
456 c_re[1] = w_re[1]*dw[0] - w_im[1]*dw[1];
457 c_re[2] = w_re[2]*dw[0] - w_im[2]*dw[1];
458 c_re[3] = w_re[3]*dw[0] - w_im[3]*dw[1];
459
460 c_im[0] = w_re[0]*dw[1] + w_im[0]*dw[0];
461 c_im[1] = w_re[1]*dw[1] + w_im[1]*dw[0];
462 c_im[2] = w_re[2]*dw[1] + w_im[2]*dw[0];
463 c_im[3] = w_re[3]*dw[1] + w_im[3]*dw[0];
464
465 w_re[0] = c_re[0];
466 w_re[1] = c_re[1];
467 w_re[2] = c_re[2];
468 w_re[3] = c_re[3];
469
470 w_im[0] = c_im[0];
471 w_im[1] = c_im[1];
472 w_im[2] = c_im[2];
473 w_im[3] = c_im[3];
474 }
475 }
476
477 dw += 2;
478 iw_re += 4;
479 iw_im += 4;
480 n <<= 1;
481 bs <<= 1;
482 }
483
484 if (n < items)
485 {
486 // ONE LARGE CYCLE
487 // Set initial values of pointers
488 float *a = tmp;
489 float *b = &a[n];
490 float *d1 = dst;
491 float *d2 = &d1[n>>1];
492 float kn = 1.0f / last;
493
494 w_re[0] = iw_re[0];
495 w_re[1] = iw_re[1];
496 w_re[2] = iw_re[2];
497 w_re[3] = iw_re[3];
498 w_im[0] = iw_im[0];
499 w_im[1] = iw_im[1];
500 w_im[2] = iw_im[2];
501 w_im[3] = iw_im[3];
502
503 for (size_t k=0; ;)
504 {
505 // Calculate complex c = w * b
506 c_re[0] = w_re[0] * b[0] - w_im[0] * b[4];
507 c_re[1] = w_re[1] * b[1] - w_im[1] * b[5];
508 c_re[2] = w_re[2] * b[2] - w_im[2] * b[6];
509 c_re[3] = w_re[3] * b[3] - w_im[3] * b[7];
510
511 // Calculate the output values:
512 // a' = a + c
513 // b' = a - c
514 d1[0] = kn*(a[0] + c_re[0]);
515 d1[1] = kn*(a[1] + c_re[1]);
516 d1[2] = kn*(a[2] + c_re[2]);
517 d1[3] = kn*(a[3] + c_re[3]);
518
519 d2[0] = kn*(a[0] - c_re[0]);
520 d2[1] = kn*(a[1] - c_re[1]);
521 d2[2] = kn*(a[2] - c_re[2]);
522 d2[3] = kn*(a[3] - c_re[3]);
523
524 // Update pointers
525 a += 8;
526 b += 8;
527 d1 += 4;
528 d2 += 4;
529
530 if ((k += 8) >= n)
531 break;
532
533 // Rotate w vector
534 c_re[0] = w_re[0]*dw[0] - w_im[0]*dw[1];
535 c_re[1] = w_re[1]*dw[0] - w_im[1]*dw[1];
536 c_re[2] = w_re[2]*dw[0] - w_im[2]*dw[1];
537 c_re[3] = w_re[3]*dw[0] - w_im[3]*dw[1];
538
539 c_im[0] = w_re[0]*dw[1] + w_im[0]*dw[0];
540 c_im[1] = w_re[1]*dw[1] + w_im[1]*dw[0];
541 c_im[2] = w_re[2]*dw[1] + w_im[2]*dw[0];
542 c_im[3] = w_re[3]*dw[1] + w_im[3]*dw[0];
543
544 w_re[0] = c_re[0];
545 w_re[1] = c_re[1];
546 w_re[2] = c_re[2];
547 w_re[3] = c_re[3];
548
549 w_im[0] = c_im[0];
550 w_im[1] = c_im[1];
551 w_im[2] = c_im[2];
552 w_im[3] = c_im[3];
553 }
554 }
555 else
556 {
557 // Add real result to the target (ignore complex result)
558 float kn = 1.0f / last;
559
560 for (size_t i=0; i<items; i += 8)
561 {
562 dst[0] += tmp[0] * kn;
563 dst[1] += tmp[1] * kn;
564 dst[2] += tmp[2] * kn;
565 dst[3] += tmp[3] * kn;
566
567 dst += 4;
568 tmp += 8;
569 }
570 }
571 }
572
573 namespace native
574 {
575 void fastconv_parse(float *dst, const float *src, size_t rank);
576 void fastconv_restore(float *dst, float *tmp, size_t rank);
577 void fastconv_apply(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
578 void fastconv_parse_apply(float *dst, float *tmp, const float *c, const float *src, size_t rank);
579 }
580
581 IF_ARCH_X86(
582 namespace sse
583 {
584 void fastconv_parse(float *dst, const float *src, size_t rank);
585 void fastconv_restore(float *dst, float *tmp, size_t rank);
586 void fastconv_apply(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
587 void fastconv_parse_apply(float *dst, float *tmp, const float *c, const float *src, size_t rank);
588 }
589
590 namespace avx
591 {
592 void fastconv_parse(float *dst, const float *src, size_t rank);
593 void fastconv_restore(float *dst, float *tmp, size_t rank);
594 void fastconv_apply(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
595 void fastconv_parse_apply(float *dst, float *tmp, const float *c, const float *src, size_t rank);
596
597 void fastconv_parse_fma3(float *dst, const float *src, size_t rank);
598 void fastconv_restore_fma3(float *dst, float *tmp, size_t rank);
599 void fastconv_apply_fma3(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
600 void fastconv_parse_apply_fma3(float *dst, float *tmp, const float *c, const float *src, size_t rank);
601 }
602 )
603
604 IF_ARCH_ARM(
605 namespace neon_d32
606 {
607 void fastconv_parse(float *dst, const float *src, size_t rank);
608 void fastconv_restore(float *dst, float *tmp, size_t rank);
609 void fastconv_apply(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
610 void fastconv_parse_apply(float *dst, float *tmp, const float *c, const float *src, size_t rank);
611 }
612 )
613
614 IF_ARCH_AARCH64(
615 namespace asimd
616 {
617 void fastconv_parse(float *dst, const float *src, size_t rank);
618 void fastconv_restore(float *dst, float *tmp, size_t rank);
619 void fastconv_apply(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
620 void fastconv_parse_apply(float *dst, float *tmp, const float *c, const float *src, size_t rank);
621 }
622 )
623
624 typedef void (* fastconv_parse_t)(float *dst, const float *src, size_t rank);
625 typedef void (* fastconv_restore_t)(float *dst, float *tmp, size_t rank);
626 typedef void (* fastconv_apply_t)(float *dst, float *tmp, const float *c1, const float *c2, size_t rank);
627 typedef void (* fastconv_parse_apply_t)(float *dst, float *tmp, const float *c, const float *src, size_t rank);
628
629 MTEST_BEGIN("dsp.fft", fastconv)
630
test_fastconv_parse(const char * text,fastconv_parse_t parse,FloatBuffer & src)631 void test_fastconv_parse(const char *text, fastconv_parse_t parse, FloatBuffer &src)
632 {
633 printf("testing %s fastconv_parse...\n", text);
634 FloatBuffer samp(BUF_SIZE >> 1, 64);
635 FloatBuffer conv(BUF_SIZE << 1, 64);
636 samp.copy(src);
637
638 samp.dump("samp");
639 parse(conv, samp, RANK);
640 conv.dump("conv");
641
642 MTEST_ASSERT_MSG(samp.valid(), "samp corrupted");
643 MTEST_ASSERT_MSG(conv.valid(), "conv corrupted");
644 }
645
test_fastconv_restore(const char * text,fastconv_parse_t parse,fastconv_restore_t restore,FloatBuffer & src,FloatBuffer & dst)646 void test_fastconv_restore(const char *text,
647 fastconv_parse_t parse, fastconv_restore_t restore,
648 FloatBuffer &src, FloatBuffer &dst)
649 {
650 printf("testing %s fastconv_parse + fastconv_restore...\n", text);
651 FloatBuffer samp(BUF_SIZE >> 1, 64);
652 FloatBuffer conv(BUF_SIZE << 1, 64);
653 FloatBuffer rest(BUF_SIZE, 64);
654
655 samp.copy(src);
656 rest.copy(dst);
657 samp.dump("samp");
658 samp.dump("rest");
659
660 parse(conv, samp, RANK);
661 conv.dump("conv");
662
663 restore(rest, conv, RANK);
664 conv.dump("rtmp");
665 rest.dump("rest");
666
667 MTEST_ASSERT_MSG(samp.valid(), "samp corrupted");
668 MTEST_ASSERT_MSG(conv.valid(), "conv corrupted");
669 MTEST_ASSERT_MSG(rest.valid(), "rest corrupted");
670 }
671
test_fastconv_apply(const char * text,fastconv_parse_t parse,fastconv_apply_t apply,FloatBuffer & src1,FloatBuffer & src2,FloatBuffer & dst)672 void test_fastconv_apply(const char *text,
673 fastconv_parse_t parse, fastconv_apply_t apply,
674 FloatBuffer &src1, FloatBuffer &src2, FloatBuffer &dst)
675 {
676 printf("testing %s fastconv_parse + fastconv_apply...\n", text);
677 FloatBuffer samp1(BUF_SIZE >> 1, 64);
678 FloatBuffer samp2(BUF_SIZE >> 1, 64);
679 FloatBuffer conv1(BUF_SIZE << 1, 64);
680 FloatBuffer conv2(BUF_SIZE << 1, 64);
681 FloatBuffer rest(BUF_SIZE, 64);
682 FloatBuffer temp(BUF_SIZE << 1, 64);
683
684 samp1.copy(src1);
685 samp2.copy(src2);
686 rest.copy(dst);
687 samp1.dump("samp1");
688 samp2.dump("samp2");
689 rest.dump ("dest ");
690
691 parse(conv1, samp1, RANK);
692 parse(conv2, samp2, RANK);
693 conv1.dump("conv1");
694 conv2.dump("conv2");
695
696 apply(rest, temp, conv1, conv2, RANK);
697 temp.dump ("temp ");
698 rest.dump ("rest ");
699
700 MTEST_ASSERT_MSG(samp1.valid(), "samp1 corrupted");
701 MTEST_ASSERT_MSG(samp2.valid(), "samp2 corrupted");
702 MTEST_ASSERT_MSG(conv1.valid(), "conv1 corrupted");
703 MTEST_ASSERT_MSG(conv2.valid(), "conv2 corrupted");
704 MTEST_ASSERT_MSG(rest.valid(), "rest corrupted");
705 MTEST_ASSERT_MSG(temp.valid(), "temp corrupted");
706 }
707
test_fastconv_parse_apply(const char * text,fastconv_parse_t parse,fastconv_parse_apply_t papply,FloatBuffer & src1,FloatBuffer & src2,FloatBuffer & dst)708 void test_fastconv_parse_apply(const char *text,
709 fastconv_parse_t parse, fastconv_parse_apply_t papply,
710 FloatBuffer &src1, FloatBuffer &src2, FloatBuffer &dst)
711 {
712 printf("testing %s fastconv_parse + fastconv_parse_apply...\n", text);
713 FloatBuffer samp1(BUF_SIZE >> 1, 64);
714 FloatBuffer samp2(BUF_SIZE >> 1, 64);
715 FloatBuffer conv (BUF_SIZE << 1, 64);
716 FloatBuffer rest(BUF_SIZE, 64);
717 FloatBuffer temp(BUF_SIZE << 1, 64);
718
719 samp1.copy(src1);
720 samp2.copy(src2);
721 rest.copy(dst);
722 samp1.dump("samp1");
723 samp2.dump("samp2");
724 rest.dump ("dest ");
725
726 parse(conv, samp1, RANK);
727 conv.dump("conv ");
728
729 papply(rest, temp, conv, samp2, RANK);
730 temp.dump ("temp ");
731 rest.dump ("rest ");
732
733 MTEST_ASSERT_MSG(samp1.valid(), "samp1 corrupted");
734 MTEST_ASSERT_MSG(samp2.valid(), "samp2 corrupted");
735 MTEST_ASSERT_MSG(conv.valid(), "conv corrupted");
736 MTEST_ASSERT_MSG(rest.valid(), "rest corrupted");
737 MTEST_ASSERT_MSG(temp.valid(), "temp corrupted");
738 }
739
740 MTEST_MAIN
741 {
742 FloatBuffer src1(BUF_SIZE >> 1, 64);
743 FloatBuffer src2(BUF_SIZE >> 1, 64);
744 FloatBuffer dst(BUF_SIZE, 64);
745
746 FloatBuffer samp1(BUF_SIZE >> 1, 64);
747 FloatBuffer samp2(BUF_SIZE >> 1, 64);
748 FloatBuffer conv1(BUF_SIZE << 1, 64);
749 FloatBuffer conv2(BUF_SIZE << 1, 64);
750 FloatBuffer rest1(BUF_SIZE, 64);
751 FloatBuffer rest2(BUF_SIZE, 64);
752 FloatBuffer temp(BUF_SIZE << 1, 64);
753
754 // Prepare data
755 for (size_t i=0; i<(BUF_SIZE >> 1); ++i)
756 {
757 src1[i] = i;
758 src2[i] = 0.1 * i;
759 }
760 for (size_t i=0; i<BUF_SIZE; ++i)
761 dst[i] = 10.0 * i;
762 samp2.copy(samp1);
763
764 // Test parse
765 test_fastconv_parse("native", fastconv_parse, src1);
766 IF_ARCH_X86(
767 if (TEST_SUPPORTED(sse::fastconv_parse))
768 test_fastconv_parse("SSE", sse::fastconv_parse, src1);
769 if (TEST_SUPPORTED(avx::fastconv_parse))
770 test_fastconv_parse("AVX", avx::fastconv_parse, src1);
771 if (TEST_SUPPORTED(avx::fastconv_parse_fma3))
772 test_fastconv_parse("FMA3", avx::fastconv_parse_fma3, src1);
773 );
774 IF_ARCH_ARM(
775 if (TEST_SUPPORTED(neon_d32::fastconv_parse))
776 test_fastconv_parse("NEON-D32", neon_d32::fastconv_parse, src1);
777 );
778 IF_ARCH_AARCH64(
779 if (TEST_SUPPORTED(asimd::fastconv_parse))
780 test_fastconv_parse("ASIMD", asimd::fastconv_parse, src1);
781 );
782
783 // Test parse + restore
784 printf("\n");
785 test_fastconv_restore("native", fastconv_parse, fastconv_restore, src1, dst);
786
787 IF_ARCH_X86(
788 if (TEST_SUPPORTED(sse::fastconv_parse) && TEST_SUPPORTED(sse::fastconv_restore))
789 test_fastconv_restore("SSE", sse::fastconv_parse, sse::fastconv_restore, src1, dst);
790 if (TEST_SUPPORTED(avx::fastconv_parse) && TEST_SUPPORTED(avx::fastconv_restore))
791 test_fastconv_restore("AVX", avx::fastconv_parse, avx::fastconv_restore, src1, dst);
792 if (TEST_SUPPORTED(avx::fastconv_parse_fma3) && TEST_SUPPORTED(avx::fastconv_restore_fma3))
793 test_fastconv_restore("FMA3", avx::fastconv_parse_fma3, avx::fastconv_restore_fma3, src1, dst);
794 );
795 IF_ARCH_ARM(
796 if (TEST_SUPPORTED(neon_d32::fastconv_parse) && TEST_SUPPORTED(neon_d32::fastconv_restore))
797 test_fastconv_restore("NEON-D32", neon_d32::fastconv_parse, neon_d32::fastconv_restore, src1, dst);
798 );
799 IF_ARCH_AARCH64(
800 if (TEST_SUPPORTED(asimd::fastconv_parse) && TEST_SUPPORTED(asimd::fastconv_restore))
801 test_fastconv_restore("ASIMD", asimd::fastconv_parse, asimd::fastconv_restore, src1, dst);
802 );
803
804 // Test parse + apply
805 printf("\n");
806 test_fastconv_apply("native", native::fastconv_parse, native::fastconv_apply, src1, src2, dst);
807
808 IF_ARCH_X86(
809 if (TEST_SUPPORTED(sse::fastconv_parse) && TEST_SUPPORTED(sse::fastconv_apply))
810 test_fastconv_apply("SSE", sse::fastconv_parse, sse::fastconv_apply, src1, src2, dst);
811 if (TEST_SUPPORTED(avx::fastconv_parse) && TEST_SUPPORTED(avx::fastconv_apply))
812 test_fastconv_apply("AVX", avx::fastconv_parse, avx::fastconv_apply, src1, src2, dst);
813 if (TEST_SUPPORTED(avx::fastconv_parse_fma3) && TEST_SUPPORTED(avx::fastconv_apply_fma3))
814 test_fastconv_apply("FMA3", avx::fastconv_parse_fma3, avx::fastconv_apply_fma3, src1, src2, dst);
815 );
816 IF_ARCH_ARM(
817 if (TEST_SUPPORTED(neon_d32::fastconv_parse) && TEST_SUPPORTED(neon_d32::fastconv_apply))
818 test_fastconv_apply("NEON-D32", neon_d32::fastconv_parse, neon_d32::fastconv_apply, src1, src2, dst);
819 );
820 IF_ARCH_AARCH64(
821 if (TEST_SUPPORTED(asimd::fastconv_parse) && TEST_SUPPORTED(asimd::fastconv_apply))
822 test_fastconv_apply("ASIMD", asimd::fastconv_parse, asimd::fastconv_apply, src1, src2, dst);
823 );
824
825 // Test parse + parse_apply
826 printf("\n");
827 test_fastconv_parse_apply("native", native::fastconv_parse, native::fastconv_parse_apply, src1, src2, dst);
828
829 IF_ARCH_X86(
830 if (TEST_SUPPORTED(sse::fastconv_parse) && TEST_SUPPORTED(sse::fastconv_parse_apply))
831 test_fastconv_parse_apply("SSE", sse::fastconv_parse, sse::fastconv_parse_apply, src1, src2, dst);
832 if (TEST_SUPPORTED(avx::fastconv_parse) && TEST_SUPPORTED(avx::fastconv_parse_apply))
833 test_fastconv_parse_apply("AVX", avx::fastconv_parse, avx::fastconv_parse_apply, src1, src2, dst);
834 if (TEST_SUPPORTED(avx::fastconv_parse_fma3) && TEST_SUPPORTED(avx::fastconv_parse_apply_fma3))
835 test_fastconv_parse_apply("FMA3", avx::fastconv_parse_fma3, avx::fastconv_parse_apply_fma3, src1, src2, dst);
836 );
837
838 IF_ARCH_ARM(
839 if (TEST_SUPPORTED(neon_d32::fastconv_parse) && TEST_SUPPORTED(neon_d32::fastconv_parse_apply))
840 test_fastconv_parse_apply("NEON-D32", neon_d32::fastconv_parse, neon_d32::fastconv_parse_apply, src1, src2, dst);
841 );
842
843 IF_ARCH_AARCH64(
844 if (TEST_SUPPORTED(asimd::fastconv_parse) && TEST_SUPPORTED(asimd::fastconv_parse_apply))
845 test_fastconv_parse_apply("ASIMD", asimd::fastconv_parse, asimd::fastconv_parse_apply, src1, src2, dst);
846 );
847 }
848 MTEST_END
849
850