1 // ==================================================================================
2 // Copyright (c) 2016 HiFi-LoFi
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is furnished
9 // to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
19 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 // ==================================================================================
21 
22 #include "AudioFFT.h"
23 
24 #include <cassert>
25 #include <cmath>
26 #include <cstring>
27 
28 
29 #if defined(AUDIOFFT_APPLE_ACCELERATE)
30   #define AUDIOFFT_APPLE_ACCELERATE_USED
31   #include <Accelerate/Accelerate.h>
32   #include <vector>
33 #elif defined (AUDIOFFT_FFTW3)
34   #define AUDIOFFT_FFTW3_USED
35   #include <fftw3.h>
36 #else
37   #if !defined(AUDIOFFT_OOURA)
38     #define AUDIOFFT_OOURA
39   #endif
40   #define AUDIOFFT_OOURA_USED
41   #include <vector>
42 #endif
43 
44 
45 namespace audiofft
46 {
47 
48   namespace details
49   {
50 
51     static bool IsPowerOf2(size_t val)
52     {
53       return (val == 1 || (val & (val-1)) == 0);
54     }
55 
56 
57     template<typename TypeDest, typename TypeSrc>
58     void ConvertBuffer(TypeDest* dest, const TypeSrc* src, size_t len)
59     {
60       for (size_t i=0; i<len; ++i)
61       {
62         dest[i] = static_cast<TypeDest>(src[i]);
63       }
64     }
65 
66 
67     template<typename TypeDest, typename TypeSrc, typename TypeFactor>
68     void ScaleBuffer(TypeDest* dest, const TypeSrc* src, const TypeFactor factor, size_t len)
69     {
70       for (size_t i=0; i<len; ++i)
71       {
72         dest[i] = static_cast<TypeDest>(static_cast<TypeFactor>(src[i]) * factor);
73       }
74     }
75 
76 
77     // ================================================================
78 
79 
80 #ifdef AUDIOFFT_OOURA_USED
81 
82     /**
83      * @internal
84      * @class OouraFFT
85      * @brief FFT implementation based on the great radix-4 routines by Takuya Ooura
86      */
87     class OouraFFT : public AudioFFTImpl
88     {
89     public:
90       OouraFFT() :
91         AudioFFTImpl(),
92         _size(0),
93         _ip(),
94         _w(),
95         _buffer()
96       {
97       }
98 
99       virtual void init(size_t size) override
100       {
101         if (_size != size)
102         {
103           _ip.resize(2 + static_cast<int>(std::sqrt(static_cast<double>(size))));
104           _w.resize(size / 2);
105           _buffer.resize(size);
106           _size = size;
107 
108           const int size4 = static_cast<int>(_size) / 4;
109           makewt(size4, _ip.data(), _w.data());
110           makect(size4, _ip.data(), _w.data() + size4);
111         }
112       }
113 
114       virtual void fft(const float* data, float* re, float* im) override
115       {
116         // Convert into the format as required by the Ooura FFT
117         ConvertBuffer(&_buffer[0], data, _size);
118 
119         rdft(static_cast<int>(_size), +1, _buffer.data(), _ip.data(), _w.data());
120 
121         // Convert back to split-complex
122         {
123           double* b = &_buffer[0];
124           double* bEnd = b + _size;
125           float *r = re;
126           float *i = im;
127           while (b != bEnd)
128           {
129             *(r++) = static_cast<float>(*(b++));
130             *(i++) = static_cast<float>(-(*(b++)));
131           }
132         }
133         const size_t size2 = _size / 2;
134         re[size2] = -im[0];
135         im[0] = 0.0;
136         im[size2] = 0.0;
137       }
138 
139       virtual void ifft(float* data, const float* re, const float* im) override
140       {
141         // Convert into the format as required by the Ooura FFT
142         {
143           double* b = &_buffer[0];
144           double* bEnd = b + _size;
145           const float *r = re;
146           const float *i = im;
147           while (b != bEnd)
148           {
149             *(b++) = static_cast<double>(*(r++));
150             *(b++) = -static_cast<double>(*(i++));
151           }
152           _buffer[1] = re[_size / 2];
153         }
154 
155         rdft(static_cast<int>(_size), -1, _buffer.data(), _ip.data(), _w.data());
156 
157         // Convert back to split-complex
158         ScaleBuffer(data, &_buffer[0], 2.0 / static_cast<double>(_size), _size);
159       }
160 
161     private:
162       size_t _size;
163       std::vector<int> _ip;
164       std::vector<double> _w;
165       std::vector<double> _buffer;
166 
167       void rdft(int n, int isgn, double *a, int *ip, double *w)
168       {
169         int nw = ip[0];
170         int nc = ip[1];
171 
172         if (isgn >= 0)
173         {
174           if (n > 4)
175           {
176             bitrv2(n, ip + 2, a);
177             cftfsub(n, a, w);
178             rftfsub(n, a, nc, w + nw);
179           }
180           else if (n == 4)
181           {
182             cftfsub(n, a, w);
183           }
184           double xi = a[0] - a[1];
185           a[0] += a[1];
186           a[1] = xi;
187         }
188         else
189         {
190           a[1] = 0.5 * (a[0] - a[1]);
191           a[0] -= a[1];
192           if (n > 4)
193           {
194             rftbsub(n, a, nc, w + nw);
195             bitrv2(n, ip + 2, a);
196             cftbsub(n, a, w);
197           }
198           else if (n == 4)
199           {
200             cftfsub(n, a, w);
201           }
202         }
203       }
204 
205 
206       /* -------- initializing routines -------- */
207 
208       void makewt(int nw, int *ip, double *w)
209       {
210         int j, nwh;
211         double delta, x, y;
212 
213         ip[0] = nw;
214         ip[1] = 1;
215         if (nw > 2) {
216           nwh = nw >> 1;
217           delta = atan(1.0) / nwh;
218           w[0] = 1;
219           w[1] = 0;
220           w[nwh] = cos(delta * nwh);
221           w[nwh + 1] = w[nwh];
222           if (nwh > 2) {
223             for (j = 2; j < nwh; j += 2) {
224               x = cos(delta * j);
225               y = sin(delta * j);
226               w[j] = x;
227               w[j + 1] = y;
228               w[nw - j] = y;
229               w[nw - j + 1] = x;
230             }
231             bitrv2(nw, ip + 2, w);
232           }
233         }
234       }
235 
236 
237       void makect(int nc, int *ip, double *c)
238       {
239         int j, nch;
240         double delta;
241 
242         ip[1] = nc;
243         if (nc > 1) {
244           nch = nc >> 1;
245           delta = atan(1.0) / nch;
246           c[0] = cos(delta * nch);
247           c[nch] = 0.5 * c[0];
248           for (j = 1; j < nch; j++) {
249             c[j] = 0.5 * cos(delta * j);
250             c[nc - j] = 0.5 * sin(delta * j);
251           }
252         }
253       }
254 
255 
256       /* -------- child routines -------- */
257 
258 
259       void bitrv2(int n, int *ip, double *a)
260       {
261         int j, j1, k, k1, l, m, m2;
262         double xr, xi, yr, yi;
263 
264         ip[0] = 0;
265         l = n;
266         m = 1;
267         while ((m << 3) < l) {
268           l >>= 1;
269           for (j = 0; j < m; j++) {
270             ip[m + j] = ip[j] + l;
271           }
272           m <<= 1;
273         }
274         m2 = 2 * m;
275         if ((m << 3) == l) {
276           for (k = 0; k < m; k++) {
277             for (j = 0; j < k; j++) {
278               j1 = 2 * j + ip[k];
279               k1 = 2 * k + ip[j];
280               xr = a[j1];
281               xi = a[j1 + 1];
282               yr = a[k1];
283               yi = a[k1 + 1];
284               a[j1] = yr;
285               a[j1 + 1] = yi;
286               a[k1] = xr;
287               a[k1 + 1] = xi;
288               j1 += m2;
289               k1 += 2 * m2;
290               xr = a[j1];
291               xi = a[j1 + 1];
292               yr = a[k1];
293               yi = a[k1 + 1];
294               a[j1] = yr;
295               a[j1 + 1] = yi;
296               a[k1] = xr;
297               a[k1 + 1] = xi;
298               j1 += m2;
299               k1 -= m2;
300               xr = a[j1];
301               xi = a[j1 + 1];
302               yr = a[k1];
303               yi = a[k1 + 1];
304               a[j1] = yr;
305               a[j1 + 1] = yi;
306               a[k1] = xr;
307               a[k1 + 1] = xi;
308               j1 += m2;
309               k1 += 2 * m2;
310               xr = a[j1];
311               xi = a[j1 + 1];
312               yr = a[k1];
313               yi = a[k1 + 1];
314               a[j1] = yr;
315               a[j1 + 1] = yi;
316               a[k1] = xr;
317               a[k1 + 1] = xi;
318             }
319             j1 = 2 * k + m2 + ip[k];
320             k1 = j1 + m2;
321             xr = a[j1];
322             xi = a[j1 + 1];
323             yr = a[k1];
324             yi = a[k1 + 1];
325             a[j1] = yr;
326             a[j1 + 1] = yi;
327             a[k1] = xr;
328             a[k1 + 1] = xi;
329           }
330         } else {
331           for (k = 1; k < m; k++) {
332             for (j = 0; j < k; j++) {
333               j1 = 2 * j + ip[k];
334               k1 = 2 * k + ip[j];
335               xr = a[j1];
336               xi = a[j1 + 1];
337               yr = a[k1];
338               yi = a[k1 + 1];
339               a[j1] = yr;
340               a[j1 + 1] = yi;
341               a[k1] = xr;
342               a[k1 + 1] = xi;
343               j1 += m2;
344               k1 += m2;
345               xr = a[j1];
346               xi = a[j1 + 1];
347               yr = a[k1];
348               yi = a[k1 + 1];
349               a[j1] = yr;
350               a[j1 + 1] = yi;
351               a[k1] = xr;
352               a[k1 + 1] = xi;
353             }
354           }
355         }
356       }
357 
358 
359       void cftfsub(int n, double *a, double *w)
360       {
361         int j, j1, j2, j3, l;
362         double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
363 
364         l = 2;
365         if (n > 8) {
366           cft1st(n, a, w);
367           l = 8;
368           while ((l << 2) < n) {
369             cftmdl(n, l, a, w);
370             l <<= 2;
371           }
372         }
373         if ((l << 2) == n) {
374           for (j = 0; j < l; j += 2) {
375             j1 = j + l;
376             j2 = j1 + l;
377             j3 = j2 + l;
378             x0r = a[j] + a[j1];
379             x0i = a[j + 1] + a[j1 + 1];
380             x1r = a[j] - a[j1];
381             x1i = a[j + 1] - a[j1 + 1];
382             x2r = a[j2] + a[j3];
383             x2i = a[j2 + 1] + a[j3 + 1];
384             x3r = a[j2] - a[j3];
385             x3i = a[j2 + 1] - a[j3 + 1];
386             a[j] = x0r + x2r;
387             a[j + 1] = x0i + x2i;
388             a[j2] = x0r - x2r;
389             a[j2 + 1] = x0i - x2i;
390             a[j1] = x1r - x3i;
391             a[j1 + 1] = x1i + x3r;
392             a[j3] = x1r + x3i;
393             a[j3 + 1] = x1i - x3r;
394           }
395         } else {
396           for (j = 0; j < l; j += 2) {
397             j1 = j + l;
398             x0r = a[j] - a[j1];
399             x0i = a[j + 1] - a[j1 + 1];
400             a[j] += a[j1];
401             a[j + 1] += a[j1 + 1];
402             a[j1] = x0r;
403             a[j1 + 1] = x0i;
404           }
405         }
406       }
407 
408 
409       void cftbsub(int n, double *a, double *w)
410       {
411         int j, j1, j2, j3, l;
412         double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
413 
414         l = 2;
415         if (n > 8) {
416           cft1st(n, a, w);
417           l = 8;
418           while ((l << 2) < n) {
419             cftmdl(n, l, a, w);
420             l <<= 2;
421           }
422         }
423         if ((l << 2) == n) {
424           for (j = 0; j < l; j += 2) {
425             j1 = j + l;
426             j2 = j1 + l;
427             j3 = j2 + l;
428             x0r = a[j] + a[j1];
429             x0i = -a[j + 1] - a[j1 + 1];
430             x1r = a[j] - a[j1];
431             x1i = -a[j + 1] + a[j1 + 1];
432             x2r = a[j2] + a[j3];
433             x2i = a[j2 + 1] + a[j3 + 1];
434             x3r = a[j2] - a[j3];
435             x3i = a[j2 + 1] - a[j3 + 1];
436             a[j] = x0r + x2r;
437             a[j + 1] = x0i - x2i;
438             a[j2] = x0r - x2r;
439             a[j2 + 1] = x0i + x2i;
440             a[j1] = x1r - x3i;
441             a[j1 + 1] = x1i - x3r;
442             a[j3] = x1r + x3i;
443             a[j3 + 1] = x1i + x3r;
444           }
445         } else {
446           for (j = 0; j < l; j += 2) {
447             j1 = j + l;
448             x0r = a[j] - a[j1];
449             x0i = -a[j + 1] + a[j1 + 1];
450             a[j] += a[j1];
451             a[j + 1] = -a[j + 1] - a[j1 + 1];
452             a[j1] = x0r;
453             a[j1 + 1] = x0i;
454           }
455         }
456       }
457 
458 
459       void cft1st(int n, double *a, double *w)
460       {
461         int j, k1, k2;
462         double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
463         double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
464 
465         x0r = a[0] + a[2];
466         x0i = a[1] + a[3];
467         x1r = a[0] - a[2];
468         x1i = a[1] - a[3];
469         x2r = a[4] + a[6];
470         x2i = a[5] + a[7];
471         x3r = a[4] - a[6];
472         x3i = a[5] - a[7];
473         a[0] = x0r + x2r;
474         a[1] = x0i + x2i;
475         a[4] = x0r - x2r;
476         a[5] = x0i - x2i;
477         a[2] = x1r - x3i;
478         a[3] = x1i + x3r;
479         a[6] = x1r + x3i;
480         a[7] = x1i - x3r;
481         wk1r = w[2];
482         x0r = a[8] + a[10];
483         x0i = a[9] + a[11];
484         x1r = a[8] - a[10];
485         x1i = a[9] - a[11];
486         x2r = a[12] + a[14];
487         x2i = a[13] + a[15];
488         x3r = a[12] - a[14];
489         x3i = a[13] - a[15];
490         a[8] = x0r + x2r;
491         a[9] = x0i + x2i;
492         a[12] = x2i - x0i;
493         a[13] = x0r - x2r;
494         x0r = x1r - x3i;
495         x0i = x1i + x3r;
496         a[10] = wk1r * (x0r - x0i);
497         a[11] = wk1r * (x0r + x0i);
498         x0r = x3i + x1r;
499         x0i = x3r - x1i;
500         a[14] = wk1r * (x0i - x0r);
501         a[15] = wk1r * (x0i + x0r);
502         k1 = 0;
503         for (j = 16; j < n; j += 16) {
504           k1 += 2;
505           k2 = 2 * k1;
506           wk2r = w[k1];
507           wk2i = w[k1 + 1];
508           wk1r = w[k2];
509           wk1i = w[k2 + 1];
510           wk3r = wk1r - 2 * wk2i * wk1i;
511           wk3i = 2 * wk2i * wk1r - wk1i;
512           x0r = a[j] + a[j + 2];
513           x0i = a[j + 1] + a[j + 3];
514           x1r = a[j] - a[j + 2];
515           x1i = a[j + 1] - a[j + 3];
516           x2r = a[j + 4] + a[j + 6];
517           x2i = a[j + 5] + a[j + 7];
518           x3r = a[j + 4] - a[j + 6];
519           x3i = a[j + 5] - a[j + 7];
520           a[j] = x0r + x2r;
521           a[j + 1] = x0i + x2i;
522           x0r -= x2r;
523           x0i -= x2i;
524           a[j + 4] = wk2r * x0r - wk2i * x0i;
525           a[j + 5] = wk2r * x0i + wk2i * x0r;
526           x0r = x1r - x3i;
527           x0i = x1i + x3r;
528           a[j + 2] = wk1r * x0r - wk1i * x0i;
529           a[j + 3] = wk1r * x0i + wk1i * x0r;
530           x0r = x1r + x3i;
531           x0i = x1i - x3r;
532           a[j + 6] = wk3r * x0r - wk3i * x0i;
533           a[j + 7] = wk3r * x0i + wk3i * x0r;
534           wk1r = w[k2 + 2];
535           wk1i = w[k2 + 3];
536           wk3r = wk1r - 2 * wk2r * wk1i;
537           wk3i = 2 * wk2r * wk1r - wk1i;
538           x0r = a[j + 8] + a[j + 10];
539           x0i = a[j + 9] + a[j + 11];
540           x1r = a[j + 8] - a[j + 10];
541           x1i = a[j + 9] - a[j + 11];
542           x2r = a[j + 12] + a[j + 14];
543           x2i = a[j + 13] + a[j + 15];
544           x3r = a[j + 12] - a[j + 14];
545           x3i = a[j + 13] - a[j + 15];
546           a[j + 8] = x0r + x2r;
547           a[j + 9] = x0i + x2i;
548           x0r -= x2r;
549           x0i -= x2i;
550           a[j + 12] = -wk2i * x0r - wk2r * x0i;
551           a[j + 13] = -wk2i * x0i + wk2r * x0r;
552           x0r = x1r - x3i;
553           x0i = x1i + x3r;
554           a[j + 10] = wk1r * x0r - wk1i * x0i;
555           a[j + 11] = wk1r * x0i + wk1i * x0r;
556           x0r = x1r + x3i;
557           x0i = x1i - x3r;
558           a[j + 14] = wk3r * x0r - wk3i * x0i;
559           a[j + 15] = wk3r * x0i + wk3i * x0r;
560         }
561       }
562 
563 
564       void cftmdl(int n, int l, double *a, double *w)
565       {
566         int j, j1, j2, j3, k, k1, k2, m, m2;
567         double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
568         double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
569 
570         m = l << 2;
571         for (j = 0; j < l; j += 2) {
572           j1 = j + l;
573           j2 = j1 + l;
574           j3 = j2 + l;
575           x0r = a[j] + a[j1];
576           x0i = a[j + 1] + a[j1 + 1];
577           x1r = a[j] - a[j1];
578           x1i = a[j + 1] - a[j1 + 1];
579           x2r = a[j2] + a[j3];
580           x2i = a[j2 + 1] + a[j3 + 1];
581           x3r = a[j2] - a[j3];
582           x3i = a[j2 + 1] - a[j3 + 1];
583           a[j] = x0r + x2r;
584           a[j + 1] = x0i + x2i;
585           a[j2] = x0r - x2r;
586           a[j2 + 1] = x0i - x2i;
587           a[j1] = x1r - x3i;
588           a[j1 + 1] = x1i + x3r;
589           a[j3] = x1r + x3i;
590           a[j3 + 1] = x1i - x3r;
591         }
592         wk1r = w[2];
593         for (j = m; j < l + m; j += 2) {
594           j1 = j + l;
595           j2 = j1 + l;
596           j3 = j2 + l;
597           x0r = a[j] + a[j1];
598           x0i = a[j + 1] + a[j1 + 1];
599           x1r = a[j] - a[j1];
600           x1i = a[j + 1] - a[j1 + 1];
601           x2r = a[j2] + a[j3];
602           x2i = a[j2 + 1] + a[j3 + 1];
603           x3r = a[j2] - a[j3];
604           x3i = a[j2 + 1] - a[j3 + 1];
605           a[j] = x0r + x2r;
606           a[j + 1] = x0i + x2i;
607           a[j2] = x2i - x0i;
608           a[j2 + 1] = x0r - x2r;
609           x0r = x1r - x3i;
610           x0i = x1i + x3r;
611           a[j1] = wk1r * (x0r - x0i);
612           a[j1 + 1] = wk1r * (x0r + x0i);
613           x0r = x3i + x1r;
614           x0i = x3r - x1i;
615           a[j3] = wk1r * (x0i - x0r);
616           a[j3 + 1] = wk1r * (x0i + x0r);
617         }
618         k1 = 0;
619         m2 = 2 * m;
620         for (k = m2; k < n; k += m2) {
621           k1 += 2;
622           k2 = 2 * k1;
623           wk2r = w[k1];
624           wk2i = w[k1 + 1];
625           wk1r = w[k2];
626           wk1i = w[k2 + 1];
627           wk3r = wk1r - 2 * wk2i * wk1i;
628           wk3i = 2 * wk2i * wk1r - wk1i;
629           for (j = k; j < l + k; j += 2) {
630             j1 = j + l;
631             j2 = j1 + l;
632             j3 = j2 + l;
633             x0r = a[j] + a[j1];
634             x0i = a[j + 1] + a[j1 + 1];
635             x1r = a[j] - a[j1];
636             x1i = a[j + 1] - a[j1 + 1];
637             x2r = a[j2] + a[j3];
638             x2i = a[j2 + 1] + a[j3 + 1];
639             x3r = a[j2] - a[j3];
640             x3i = a[j2 + 1] - a[j3 + 1];
641             a[j] = x0r + x2r;
642             a[j + 1] = x0i + x2i;
643             x0r -= x2r;
644             x0i -= x2i;
645             a[j2] = wk2r * x0r - wk2i * x0i;
646             a[j2 + 1] = wk2r * x0i + wk2i * x0r;
647             x0r = x1r - x3i;
648             x0i = x1i + x3r;
649             a[j1] = wk1r * x0r - wk1i * x0i;
650             a[j1 + 1] = wk1r * x0i + wk1i * x0r;
651             x0r = x1r + x3i;
652             x0i = x1i - x3r;
653             a[j3] = wk3r * x0r - wk3i * x0i;
654             a[j3 + 1] = wk3r * x0i + wk3i * x0r;
655           }
656           wk1r = w[k2 + 2];
657           wk1i = w[k2 + 3];
658           wk3r = wk1r - 2 * wk2r * wk1i;
659           wk3i = 2 * wk2r * wk1r - wk1i;
660           for (j = k + m; j < l + (k + m); j += 2) {
661             j1 = j + l;
662             j2 = j1 + l;
663             j3 = j2 + l;
664             x0r = a[j] + a[j1];
665             x0i = a[j + 1] + a[j1 + 1];
666             x1r = a[j] - a[j1];
667             x1i = a[j + 1] - a[j1 + 1];
668             x2r = a[j2] + a[j3];
669             x2i = a[j2 + 1] + a[j3 + 1];
670             x3r = a[j2] - a[j3];
671             x3i = a[j2 + 1] - a[j3 + 1];
672             a[j] = x0r + x2r;
673             a[j + 1] = x0i + x2i;
674             x0r -= x2r;
675             x0i -= x2i;
676             a[j2] = -wk2i * x0r - wk2r * x0i;
677             a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
678             x0r = x1r - x3i;
679             x0i = x1i + x3r;
680             a[j1] = wk1r * x0r - wk1i * x0i;
681             a[j1 + 1] = wk1r * x0i + wk1i * x0r;
682             x0r = x1r + x3i;
683             x0i = x1i - x3r;
684             a[j3] = wk3r * x0r - wk3i * x0i;
685             a[j3 + 1] = wk3r * x0i + wk3i * x0r;
686           }
687         }
688       }
689 
690 
691       void rftfsub(int n, double *a, int nc, double *c)
692       {
693         int j, k, kk, ks, m;
694         double wkr, wki, xr, xi, yr, yi;
695 
696         m = n >> 1;
697         ks = 2 * nc / m;
698         kk = 0;
699         for (j = 2; j < m; j += 2) {
700           k = n - j;
701           kk += ks;
702           wkr = 0.5 - c[nc - kk];
703           wki = c[kk];
704           xr = a[j] - a[k];
705           xi = a[j + 1] + a[k + 1];
706           yr = wkr * xr - wki * xi;
707           yi = wkr * xi + wki * xr;
708           a[j] -= yr;
709           a[j + 1] -= yi;
710           a[k] += yr;
711           a[k + 1] -= yi;
712         }
713       }
714 
715 
716       void rftbsub(int n, double *a, int nc, double *c)
717       {
718         int j, k, kk, ks, m;
719         double wkr, wki, xr, xi, yr, yi;
720 
721         a[1] = -a[1];
722         m = n >> 1;
723         ks = 2 * nc / m;
724         kk = 0;
725         for (j = 2; j < m; j += 2) {
726           k = n - j;
727           kk += ks;
728           wkr = 0.5 - c[nc - kk];
729           wki = c[kk];
730           xr = a[j] - a[k];
731           xi = a[j + 1] + a[k + 1];
732           yr = wkr * xr + wki * xi;
733           yi = wkr * xi - wki * xr;
734           a[j] -= yr;
735           a[j + 1] = yi - a[j + 1];
736           a[k] += yr;
737           a[k + 1] = yi - a[k + 1];
738         }
739         a[m + 1] = -a[m + 1];
740       }
741 
742       OouraFFT(const OouraFFT&) = delete;
743       OouraFFT& operator=(const OouraFFT&) = delete;
744     };
745 
746     std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl()
747     {
748       return std::unique_ptr<OouraFFT>(new OouraFFT());
749     }
750 
751 
752 #endif // AUDIOFFT_OOURA_USED
753 
754 
755     // ================================================================
756 
757 
758 #ifdef AUDIOFFT_APPLE_ACCELERATE_USED
759 
760 
761     /**
762      * @internal
763      * @class AppleAccelerateFFT
764      * @brief FFT implementation using the Apple Accelerate framework internally
765      */
766     class AppleAccelerateFFT : public AudioFFTImpl
767     {
768     public:
769       AppleAccelerateFFT() :
770         AudioFFTImpl(),
771         _size(0),
772         _powerOf2(0),
773         _fftSetup(0),
774         _re(),
775         _im()
776       {
777       }
778 
779       virtual ~AppleAccelerateFFT()
780       {
781         init(0);
782       }
783 
784       virtual void init(size_t size) override
785       {
786         if (_fftSetup)
787         {
788           vDSP_destroy_fftsetup(_fftSetup);
789           _size = 0;
790           _powerOf2 = 0;
791           _fftSetup = 0;
792           _re.clear();
793           _im.clear();
794         }
795 
796         if (size > 0)
797         {
798           _size = size;
799           _powerOf2 = 0;
800           while ((1 << _powerOf2) < _size)
801           {
802             ++_powerOf2;
803           }
804           _fftSetup = vDSP_create_fftsetup(_powerOf2, FFT_RADIX2);
805           _re.resize(_size / 2);
806           _im.resize(_size / 2);
807         }
808       }
809 
810       virtual void fft(const float* data, float* re, float* im) override
811       {
812         const size_t size2 = _size / 2;
813         DSPSplitComplex splitComplex;
814         splitComplex.realp = re;
815         splitComplex.imagp = im;
816         vDSP_ctoz(reinterpret_cast<const COMPLEX*>(data), 2, &splitComplex, 1, size2);
817         vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_FORWARD);
818         const float factor = 0.5f;
819         vDSP_vsmul(re, 1, &factor, re, 1, size2);
820         vDSP_vsmul(im, 1, &factor, im, 1, size2);
821         re[size2] = im[0];
822         im[0] = 0.0f;
823         im[size2] = 0.0f;
824       }
825 
826       virtual void ifft(float* data, const float* re, const float* im) override
827       {
828         const size_t size2 = _size / 2;
829         ::memcpy(_re.data(), re, size2 * sizeof(float));
830         ::memcpy(_im.data(), im, size2 * sizeof(float));
831         _im[0] = re[size2];
832         DSPSplitComplex splitComplex;
833         splitComplex.realp = _re.data();
834         splitComplex.imagp = _im.data();
835         vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_INVERSE);
836         vDSP_ztoc(&splitComplex, 1, reinterpret_cast<COMPLEX*>(data), 2, size2);
837         const float factor = 1.0f / static_cast<float>(_size);
838         vDSP_vsmul(data, 1, &factor, data, 1, _size);
839       }
840 
841     private:
842       size_t _size;
843       size_t _powerOf2;
844       FFTSetup _fftSetup;
845       std::vector<float> _re;
846       std::vector<float> _im;
847 
848       AppleAccelerateFFT(const AppleAccelerateFFT&) = delete;
849       AppleAccelerateFFT& operator=(const AppleAccelerateFFT&) = delete;
850     };
851 
852 
853     std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl()
854     {
855       return std::unique_ptr<AppleAccelerateFFT>(new AppleAccelerateFFT());
856     }
857 
858 
859 #endif // AUDIOFFT_APPLE_ACCELERATE_USED
860 
861 
862     // ================================================================
863 
864 
865 #ifdef AUDIOFFT_FFTW3_USED
866 
867 
868     /**
869      * @internal
870      * @class FFTW3FFT
871      * @brief FFT implementation using FFTW3 internally (see fftw.org)
872      */
873     class FFTW3FFT : public AudioFFTImpl
874     {
875     public:
876       FFTW3FFT() :
877         AudioFFTImpl(),
878        _size(0),
879        _complexSize(0),
880        _planForward(0),
881        _planBackward(0),
882        _data(0),
883        _re(0),
884        _im(0)
885       {
886       }
887 
888       virtual ~FFTW3FFT()
889       {
890         init(0);
891       }
892 
893       virtual void init(size_t size) override
894       {
895         if (_size != size)
896         {
897           if (_size > 0)
898           {
899             fftwf_destroy_plan(_planForward);
900             fftwf_destroy_plan(_planBackward);
901             _planForward = 0;
902             _planBackward = 0;
903             _size = 0;
904             _complexSize = 0;
905 
906             if (_data)
907             {
908               fftwf_free(_data);
909               _data = 0;
910             }
911 
912             if (_re)
913             {
914               fftwf_free(_re);
915               _re = 0;
916             }
917 
918             if (_im)
919             {
920               fftwf_free(_im);
921               _im = 0;
922             }
923           }
924 
925           if (size > 0)
926           {
927             _size = size;
928             _complexSize = AudioFFT::ComplexSize(_size);
929             const size_t complexSize = AudioFFT::ComplexSize(_size);
930             _data = reinterpret_cast<float*>(fftwf_malloc(_size * sizeof(float)));
931             _re = reinterpret_cast<float*>(fftwf_malloc(complexSize * sizeof(float)));
932             _im = reinterpret_cast<float*>(fftwf_malloc(complexSize * sizeof(float)));
933 
934             fftw_iodim dim;
935             dim.n = static_cast<int>(size);
936             dim.is = 1;
937             dim.os = 1;
938             _planForward = fftwf_plan_guru_split_dft_r2c(1, &dim, 0, 0, _data, _re, _im, FFTW_MEASURE);
939             _planBackward = fftwf_plan_guru_split_dft_c2r(1, &dim, 0, 0, _re, _im, _data, FFTW_MEASURE);
940           }
941         }
942       }
943 
944       virtual void fft(const float* data, float* re, float* im) override
945       {
946         ::memcpy(_data, data, _size * sizeof(float));
947         fftwf_execute_split_dft_r2c(_planForward, _data, _re, _im);
948         ::memcpy(re, _re, _complexSize * sizeof(float));
949         ::memcpy(im, _im, _complexSize * sizeof(float));
950       }
951 
952       void ifft(float* data, const float* re, const float* im)
953       {
954         ::memcpy(_re, re, _complexSize * sizeof(float));
955         ::memcpy(_im, im, _complexSize * sizeof(float));
956         fftwf_execute_split_dft_c2r(_planBackward, _re, _im, _data);
957         ScaleBuffer(data, _data, 1.0f / static_cast<float>(_size), _size);
958       }
959 
960     private:
961       size_t _size;
962       size_t _complexSize;
963       fftwf_plan _planForward;
964       fftwf_plan _planBackward;
965       float* _data;
966       float* _re;
967       float* _im;
968 
969       FFTW3FFT(const FFTW3FFT&) = delete;
970       FFTW3FFT& operator=(const FFTW3FFT&) = delete;
971     };
972 
973 
974     std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl()
975     {
976       return std::unique_ptr<FFTW3FFT>(new FFTW3FFT());
977     }
978 
979 
980 #endif // AUDIOFFT_FFTW3_USED
981 
982   } // End of namespace details
983 
984 
985   // =============================================================
986 
987 
988   AudioFFT::AudioFFT() :
989     _impl(details::MakeAudioFFTImpl())
990   {
991   }
992 
993 
994   void AudioFFT::init(size_t size)
995   {
996     assert(details::IsPowerOf2(size));
997     _impl->init(size);
998   }
999 
1000 
1001   void AudioFFT::fft(const float* data, float* re, float* im)
1002   {
1003     _impl->fft(data, re, im);
1004   }
1005 
1006 
1007   void AudioFFT::ifft(float* data, const float* re, const float* im)
1008   {
1009     _impl->ifft(data, re, im);
1010   }
1011 
1012 
1013   size_t AudioFFT::ComplexSize(size_t size)
1014   {
1015     return (size / 2) + 1;
1016   }
1017 
1018 } // End of namespace
1019