1 /*
2   ==============================================================================
3 
4    This file is part of the JUCE library.
5    Copyright (c) 2020 - Raw Material Software Limited
6 
7    JUCE is an open source library subject to commercial or open-source
8    licensing.
9 
10    By using JUCE, you agree to the terms of both the JUCE 6 End-User License
11    Agreement and JUCE Privacy Policy (both effective as of the 16th June 2020).
12 
13    End User License Agreement: www.juce.com/juce-6-licence
14    Privacy Policy: www.juce.com/juce-privacy-policy
15 
16    Or: You may also use this code under the terms of the GPL v3 (see
17    www.gnu.org/licenses).
18 
19    JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
20    EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
21    DISCLAIMED.
22 
23   ==============================================================================
24 */
25 
26 namespace juce
27 {
28 namespace dsp
29 {
30 
31 struct FFT::Instance
32 {
33     virtual ~Instance() = default;
34     virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
35     virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
36     virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
37 };
38 
39 struct FFT::Engine
40 {
Enginejuce::dsp::FFT::Engine41     Engine (int priorityToUse) : enginePriority (priorityToUse)
42     {
43         auto& list = getEngines();
44         list.add (this);
45         std::sort (list.begin(), list.end(), [] (Engine* a, Engine* b) { return b->enginePriority < a->enginePriority; });
46     }
47 
48     virtual ~Engine() = default;
49 
50     virtual FFT::Instance* create (int order) const = 0;
51 
52     //==============================================================================
createBestEngineForPlatformjuce::dsp::FFT::Engine53     static FFT::Instance* createBestEngineForPlatform (int order)
54     {
55         for (auto* engine : getEngines())
56             if (auto* instance = engine->create (order))
57                 return instance;
58 
59         jassertfalse;  // This should never happen as the fallback engine should always work!
60         return nullptr;
61     }
62 
63 private:
getEnginesjuce::dsp::FFT::Engine64     static Array<Engine*>& getEngines()
65     {
66         static Array<Engine*> engines;
67         return engines;
68     }
69 
70     int enginePriority; // used so that faster engines have priority over slower ones
71 };
72 
73 template <typename InstanceToUse>
74 struct FFT::EngineImpl  : public FFT::Engine
75 {
EngineImpljuce::dsp::FFT::EngineImpl76     EngineImpl() : FFT::Engine (InstanceToUse::priority)        {}
createjuce::dsp::FFT::EngineImpl77     FFT::Instance* create (int order) const override            { return InstanceToUse::create (order); }
78 };
79 
80 //==============================================================================
81 //==============================================================================
82 struct FFTFallback  : public FFT::Instance
83 {
84     // this should have the least priority of all engines
85     static constexpr int priority = -1;
86 
createjuce::dsp::FFTFallback87     static FFTFallback* create (int order)
88     {
89         return new FFTFallback (order);
90     }
91 
FFTFallbackjuce::dsp::FFTFallback92     FFTFallback (int order)
93     {
94         configForward.reset (new FFTConfig (1 << order, false));
95         configInverse.reset (new FFTConfig (1 << order, true));
96 
97         size = 1 << order;
98     }
99 
performjuce::dsp::FFTFallback100     void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
101     {
102         if (size == 1)
103         {
104             *output = *input;
105             return;
106         }
107 
108         const SpinLock::ScopedLockType sl(processLock);
109 
110         jassert (configForward != nullptr);
111 
112         if (inverse)
113         {
114             configInverse->perform (input, output);
115 
116             const float scaleFactor = 1.0f / (float) size;
117 
118             for (int i = 0; i < size; ++i)
119                 output[i] *= scaleFactor;
120         }
121         else
122         {
123             configForward->perform (input, output);
124         }
125     }
126 
127     const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;
128 
performRealOnlyForwardTransformjuce::dsp::FFTFallback129     void performRealOnlyForwardTransform (float* d, bool) const noexcept override
130     {
131         if (size == 1)
132             return;
133 
134         const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
135 
136         if (scratchSize < maxFFTScratchSpaceToAlloca)
137         {
138             performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
139         }
140         else
141         {
142             HeapBlock<char> heapSpace (scratchSize);
143             performRealOnlyForwardTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
144         }
145     }
146 
performRealOnlyInverseTransformjuce::dsp::FFTFallback147     void performRealOnlyInverseTransform (float* d) const noexcept override
148     {
149         if (size == 1)
150             return;
151 
152         const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);
153 
154         if (scratchSize < maxFFTScratchSpaceToAlloca)
155         {
156             performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
157         }
158         else
159         {
160             HeapBlock<char> heapSpace (scratchSize);
161             performRealOnlyInverseTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
162         }
163     }
164 
performRealOnlyForwardTransformjuce::dsp::FFTFallback165     void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
166     {
167         for (int i = 0; i < size; ++i)
168             scratch[i] = { d[i], 0 };
169 
170         perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
171     }
172 
performRealOnlyInverseTransformjuce::dsp::FFTFallback173     void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
174     {
175         auto* input = reinterpret_cast<Complex<float>*> (d);
176 
177         for (int i = size >> 1; i < size; ++i)
178             input[i] = std::conj (input[size - i]);
179 
180         perform (input, scratch, true);
181 
182         for (int i = 0; i < size; ++i)
183         {
184             d[i] = scratch[i].real();
185             d[i + size] = scratch[i].imag();
186         }
187     }
188 
189     //==============================================================================
190     struct FFTConfig
191     {
FFTConfigjuce::dsp::FFTFallback::FFTConfig192         FFTConfig (int sizeOfFFT, bool isInverse)
193             : fftSize (sizeOfFFT), inverse (isInverse), twiddleTable ((size_t) sizeOfFFT)
194         {
195             auto inverseFactor = (inverse ? 2.0 : -2.0) * MathConstants<double>::pi / (double) fftSize;
196 
197             if (fftSize <= 4)
198             {
199                 for (int i = 0; i < fftSize; ++i)
200                 {
201                     auto phase = i * inverseFactor;
202 
203                     twiddleTable[i] = { (float) std::cos (phase),
204                                         (float) std::sin (phase) };
205                 }
206             }
207             else
208             {
209                 for (int i = 0; i < fftSize / 4; ++i)
210                 {
211                     auto phase = i * inverseFactor;
212 
213                     twiddleTable[i] = { (float) std::cos (phase),
214                                         (float) std::sin (phase) };
215                 }
216 
217                 for (int i = fftSize / 4; i < fftSize / 2; ++i)
218                 {
219                     auto other = twiddleTable[i - fftSize / 4];
220 
221                     twiddleTable[i] = { inverse ? -other.imag() :  other.imag(),
222                                         inverse ?  other.real() : -other.real() };
223                 }
224 
225                 twiddleTable[fftSize / 2].real (-1.0f);
226                 twiddleTable[fftSize / 2].imag (0.0f);
227 
228                 for (int i = fftSize / 2; i < fftSize; ++i)
229                 {
230                     auto index = fftSize / 2 - (i - fftSize / 2);
231                     twiddleTable[i] = conj(twiddleTable[index]);
232                 }
233             }
234 
235             auto root = (int) std::sqrt ((double) fftSize);
236             int divisor = 4, n = fftSize;
237 
238             for (int i = 0; i < numElementsInArray (factors); ++i)
239             {
240                 while ((n % divisor) != 0)
241                 {
242                     if (divisor == 2)       divisor = 3;
243                     else if (divisor == 4)  divisor = 2;
244                     else                    divisor += 2;
245 
246                     if (divisor > root)
247                         divisor = n;
248                 }
249 
250                 n /= divisor;
251 
252                 jassert (divisor == 1 || divisor == 2 || divisor == 4);
253                 factors[i].radix = divisor;
254                 factors[i].length = n;
255             }
256         }
257 
performjuce::dsp::FFTFallback::FFTConfig258         void perform (const Complex<float>* input, Complex<float>* output) const noexcept
259         {
260             perform (input, output, 1, 1, factors);
261         }
262 
263         const int fftSize;
264         const bool inverse;
265 
266         struct Factor { int radix, length; };
267         Factor factors[32];
268         HeapBlock<Complex<float>> twiddleTable;
269 
performjuce::dsp::FFTFallback::FFTConfig270         void perform (const Complex<float>* input, Complex<float>* output, int stride, int strideIn, const Factor* facs) const noexcept
271         {
272             auto factor = *facs++;
273             auto* originalOutput = output;
274             auto* outputEnd = output + factor.radix * factor.length;
275 
276             if (stride == 1 && factor.radix <= 5)
277             {
278                 for (int i = 0; i < factor.radix; ++i)
279                     perform (input + stride * strideIn * i, output + i * factor.length, stride * factor.radix, strideIn, facs);
280 
281                 butterfly (factor, output, stride);
282                 return;
283             }
284 
285             if (factor.length == 1)
286             {
287                 do
288                 {
289                     *output++ = *input;
290                     input += stride * strideIn;
291                 }
292                 while (output < outputEnd);
293             }
294             else
295             {
296                 do
297                 {
298                     perform (input, output, stride * factor.radix, strideIn, facs);
299                     input += stride * strideIn;
300                     output += factor.length;
301                 }
302                 while (output < outputEnd);
303             }
304 
305             butterfly (factor, originalOutput, stride);
306         }
307 
butterflyjuce::dsp::FFTFallback::FFTConfig308         void butterfly (const Factor factor, Complex<float>* data, int stride) const noexcept
309         {
310             switch (factor.radix)
311             {
312                 case 1:   break;
313                 case 2:   butterfly2 (data, stride, factor.length); return;
314                 case 4:   butterfly4 (data, stride, factor.length); return;
315                 default:  jassertfalse; break;
316             }
317 
318             auto* scratch = static_cast<Complex<float>*> (alloca ((size_t) factor.radix * sizeof (Complex<float>)));
319 
320             for (int i = 0; i < factor.length; ++i)
321             {
322                 for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
323                 {
324                     scratch[q1] = data[k];
325                     k += factor.length;
326                 }
327 
328                 for (int k = i, q1 = 0; q1 < factor.radix; ++q1)
329                 {
330                     int twiddleIndex = 0;
331                     data[k] = scratch[0];
332 
333                     for (int q = 1; q < factor.radix; ++q)
334                     {
335                         twiddleIndex += stride * k;
336 
337                         if (twiddleIndex >= fftSize)
338                             twiddleIndex -= fftSize;
339 
340                         data[k] += scratch[q] * twiddleTable[twiddleIndex];
341                     }
342 
343                     k += factor.length;
344                 }
345             }
346         }
347 
butterfly2juce::dsp::FFTFallback::FFTConfig348         void butterfly2 (Complex<float>* data, const int stride, const int length) const noexcept
349         {
350             auto* dataEnd = data + length;
351             auto* tw = twiddleTable.getData();
352 
353             for (int i = length; --i >= 0;)
354             {
355                 auto s = *dataEnd;
356                 s *= (*tw);
357                 tw += stride;
358                 *dataEnd++ = *data - s;
359                 *data++ += s;
360             }
361         }
362 
butterfly4juce::dsp::FFTFallback::FFTConfig363         void butterfly4 (Complex<float>* data, const int stride, const int length) const noexcept
364         {
365             auto lengthX2 = length * 2;
366             auto lengthX3 = length * 3;
367 
368             auto strideX2 = stride * 2;
369             auto strideX3 = stride * 3;
370 
371             auto* twiddle1 = twiddleTable.getData();
372             auto* twiddle2 = twiddle1;
373             auto* twiddle3 = twiddle1;
374 
375             for (int i = length; --i >= 0;)
376             {
377                 auto s0 = data[length]   * *twiddle1;
378                 auto s1 = data[lengthX2] * *twiddle2;
379                 auto s2 = data[lengthX3] * *twiddle3;
380                 auto s3 = s0;             s3 += s2;
381                 auto s4 = s0;             s4 -= s2;
382                 auto s5 = *data;          s5 -= s1;
383 
384                 *data += s1;
385                 data[lengthX2] = *data;
386                 data[lengthX2] -= s3;
387                 twiddle1 += stride;
388                 twiddle2 += strideX2;
389                 twiddle3 += strideX3;
390                 *data += s3;
391 
392                 if (inverse)
393                 {
394                     data[length] = { s5.real() - s4.imag(),
395                                      s5.imag() + s4.real() };
396 
397                     data[lengthX3] = { s5.real() + s4.imag(),
398                                        s5.imag() - s4.real() };
399                 }
400                 else
401                 {
402                     data[length] = { s5.real() + s4.imag(),
403                                      s5.imag() - s4.real() };
404 
405                     data[lengthX3] = { s5.real() - s4.imag(),
406                                        s5.imag() + s4.real() };
407                 }
408 
409                 ++data;
410             }
411         }
412 
413         JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (FFTConfig)
414     };
415 
416     //==============================================================================
417     SpinLock processLock;
418     std::unique_ptr<FFTConfig> configForward, configInverse;
419     int size;
420 };
421 
422 FFT::EngineImpl<FFTFallback> fftFallback;
423 
424 //==============================================================================
425 //==============================================================================
426 #if (JUCE_MAC || JUCE_IOS) && JUCE_USE_VDSP_FRAMEWORK
427 struct AppleFFT  : public FFT::Instance
428 {
429     static constexpr int priority = 5;
430 
createjuce::dsp::AppleFFT431     static AppleFFT* create (int order)
432     {
433         return new AppleFFT (order);
434     }
435 
AppleFFTjuce::dsp::AppleFFT436     AppleFFT (int orderToUse)
437         : order (static_cast<vDSP_Length> (orderToUse)),
438           fftSetup (vDSP_create_fftsetup (order, 2)),
439           forwardNormalisation (0.5f),
440           inverseNormalisation (1.0f / static_cast<float> (1 << order))
441     {}
442 
~AppleFFTjuce::dsp::AppleFFT443     ~AppleFFT() override
444     {
445         if (fftSetup != nullptr)
446         {
447             vDSP_destroy_fftsetup (fftSetup);
448             fftSetup = nullptr;
449         }
450     }
451 
performjuce::dsp::AppleFFT452     void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
453     {
454         auto size = (1 << order);
455 
456         DSPSplitComplex splitInput  (toSplitComplex (const_cast<Complex<float>*> (input)));
457         DSPSplitComplex splitOutput (toSplitComplex (output));
458 
459         vDSP_fft_zop (fftSetup, &splitInput,  2, &splitOutput, 2,
460                       order, inverse ?  kFFTDirection_Inverse : kFFTDirection_Forward);
461 
462         float factor = (inverse ? inverseNormalisation : forwardNormalisation * 2.0f);
463         vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
464     }
465 
performRealOnlyForwardTransformjuce::dsp::AppleFFT466     void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
467     {
468         auto size = (1 << order);
469         auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
470         auto splitInOut (toSplitComplex (inout));
471 
472         inoutData[size] = 0.0f;
473         vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
474         vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
475 
476         mirrorResult (inout, ignoreNegativeFreqs);
477     }
478 
performRealOnlyInverseTransformjuce::dsp::AppleFFT479     void performRealOnlyInverseTransform (float* inoutData) const noexcept override
480     {
481         auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
482         auto size = (1 << order);
483         auto splitInOut (toSplitComplex (inout));
484 
485         // Imaginary part of nyquist and DC frequencies are always zero
486         // so Apple uses the imaginary part of the DC frequency to store
487         // the real part of the nyquist frequency
488         if (size != 1)
489             inout[0] = Complex<float> (inout[0].real(), inout[size >> 1].real());
490 
491         vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Inverse);
492         vDSP_vsmul (inoutData, 1, &inverseNormalisation, inoutData, 1, static_cast<size_t> (size << 1));
493         vDSP_vclr (inoutData + size, 1, static_cast<size_t> (size));
494     }
495 
496 private:
497     //==============================================================================
mirrorResultjuce::dsp::AppleFFT498     void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
499     {
500         auto size = (1 << order);
501         auto i = size >> 1;
502 
503         // Imaginary part of nyquist and DC frequencies are always zero
504         // so Apple uses the imaginary part of the DC frequency to store
505         // the real part of the nyquist frequency
506         out[i++] = { out[0].imag(), 0.0 };
507         out[0]   = { out[0].real(), 0.0 };
508 
509         if (! ignoreNegativeFreqs)
510             for (; i < size; ++i)
511                 out[i] = std::conj (out[size - i]);
512     }
513 
toSplitComplexjuce::dsp::AppleFFT514     static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
515     {
516         // this assumes that Complex interleaves real and imaginary parts
517         // and is tightly packed.
518         return { reinterpret_cast<float*> (data),
519                  reinterpret_cast<float*> (data) + 1};
520     }
521 
522     //==============================================================================
523     vDSP_Length order;
524     FFTSetup fftSetup;
525     float forwardNormalisation, inverseNormalisation;
526 };
527 
528 FFT::EngineImpl<AppleFFT> appleFFT;
529 #endif
530 
531 //==============================================================================
532 //==============================================================================
533 #if JUCE_DSP_USE_SHARED_FFTW || JUCE_DSP_USE_STATIC_FFTW
534 
535 #if JUCE_DSP_USE_STATIC_FFTW
536 extern "C"
537 {
538     void* fftwf_plan_dft_1d     (int, void*, void*, int, int);
539     void* fftwf_plan_dft_r2c_1d (int, void*, void*, int);
540     void* fftwf_plan_dft_c2r_1d (int, void*, void*, int);
541     void fftwf_destroy_plan     (void*);
542     void fftwf_execute_dft      (void*, void*, void*);
543     void fftwf_execute_dft_r2c  (void*, void*, void*);
544     void fftwf_execute_dft_c2r  (void*, void*, void*);
545 }
546 #endif
547 
548 struct FFTWImpl  : public FFT::Instance
549 {
550    #if JUCE_DSP_USE_STATIC_FFTW
551     // if the JUCE developer has gone through the hassle of statically
552     // linking in fftw, they probably want to use it
553     static constexpr int priority = 10;
554    #else
555     static constexpr int priority = 3;
556    #endif
557 
558     struct FFTWPlan;
559     using FFTWPlanRef = FFTWPlan*;
560 
561     enum
562     {
563         measure   = 0,
564         unaligned = (1 << 1),
565         estimate  = (1 << 6)
566     };
567 
568     struct Symbols
569     {
570         FFTWPlanRef (*plan_dft_fftw) (unsigned, Complex<float>*, Complex<float>*, int, unsigned);
571         FFTWPlanRef (*plan_r2c_fftw) (unsigned, float*, Complex<float>*, unsigned);
572         FFTWPlanRef (*plan_c2r_fftw) (unsigned, Complex<float>*, float*, unsigned);
573         void (*destroy_fftw) (FFTWPlanRef);
574 
575         void (*execute_dft_fftw) (FFTWPlanRef, const Complex<float>*, Complex<float>*);
576         void (*execute_r2c_fftw) (FFTWPlanRef, float*, Complex<float>*);
577         void (*execute_c2r_fftw) (FFTWPlanRef, Complex<float>*, float*);
578 
579        #if JUCE_DSP_USE_STATIC_FFTW
580         template <typename FuncPtr, typename ActualSymbolType>
symboljuce::dsp::FFTWImpl::Symbols581         static bool symbol (FuncPtr& dst, ActualSymbolType sym)
582         {
583             dst = reinterpret_cast<FuncPtr> (sym);
584             return true;
585         }
586        #else
587         template <typename FuncPtr>
symboljuce::dsp::FFTWImpl::Symbols588         static bool symbol (DynamicLibrary& lib, FuncPtr& dst, const char* name)
589         {
590             dst = reinterpret_cast<FuncPtr> (lib.getFunction (name));
591             return (dst != nullptr);
592         }
593        #endif
594     };
595 
createjuce::dsp::FFTWImpl596     static FFTWImpl* create (int order)
597     {
598         DynamicLibrary lib;
599 
600       #if ! JUCE_DSP_USE_STATIC_FFTW
601        #if JUCE_MAC
602         auto libName = "libfftw3f.dylib";
603        #elif JUCE_WINDOWS
604         auto libName = "libfftw3f.dll";
605        #else
606         auto libName = "libfftw3f.so";
607        #endif
608 
609         if (lib.open (libName))
610       #endif
611         {
612             Symbols symbols;
613 
614            #if JUCE_DSP_USE_STATIC_FFTW
615             if (! Symbols::symbol (symbols.plan_dft_fftw, fftwf_plan_dft_1d))     return nullptr;
616             if (! Symbols::symbol (symbols.plan_r2c_fftw, fftwf_plan_dft_r2c_1d)) return nullptr;
617             if (! Symbols::symbol (symbols.plan_c2r_fftw, fftwf_plan_dft_c2r_1d)) return nullptr;
618             if (! Symbols::symbol (symbols.destroy_fftw,  fftwf_destroy_plan))    return nullptr;
619 
620             if (! Symbols::symbol (symbols.execute_dft_fftw, fftwf_execute_dft))     return nullptr;
621             if (! Symbols::symbol (symbols.execute_r2c_fftw, fftwf_execute_dft_r2c)) return nullptr;
622             if (! Symbols::symbol (symbols.execute_c2r_fftw, fftwf_execute_dft_c2r)) return nullptr;
623            #else
624             if (! Symbols::symbol (lib, symbols.plan_dft_fftw, "fftwf_plan_dft_1d"))     return nullptr;
625             if (! Symbols::symbol (lib, symbols.plan_r2c_fftw, "fftwf_plan_dft_r2c_1d")) return nullptr;
626             if (! Symbols::symbol (lib, symbols.plan_c2r_fftw, "fftwf_plan_dft_c2r_1d")) return nullptr;
627             if (! Symbols::symbol (lib, symbols.destroy_fftw,  "fftwf_destroy_plan"))    return nullptr;
628 
629             if (! Symbols::symbol (lib, symbols.execute_dft_fftw, "fftwf_execute_dft"))     return nullptr;
630             if (! Symbols::symbol (lib, symbols.execute_r2c_fftw, "fftwf_execute_dft_r2c")) return nullptr;
631             if (! Symbols::symbol (lib, symbols.execute_c2r_fftw, "fftwf_execute_dft_c2r")) return nullptr;
632            #endif
633 
634             return new FFTWImpl (static_cast<size_t> (order), std::move (lib), symbols);
635         }
636 
637         return nullptr;
638     }
639 
FFTWImpljuce::dsp::FFTWImpl640     FFTWImpl (size_t orderToUse, DynamicLibrary&& libraryToUse, const Symbols& symbols)
641         : fftwLibrary (std::move (libraryToUse)), fftw (symbols), order (static_cast<size_t> (orderToUse))
642     {
643         ScopedLock lock (getFFTWPlanLock());
644 
645         auto n = (1u << order);
646         HeapBlock<Complex<float>> in (n), out (n);
647 
648         c2cForward = fftw.plan_dft_fftw (n, in.getData(), out.getData(), -1, unaligned | estimate);
649         c2cInverse = fftw.plan_dft_fftw (n, in.getData(), out.getData(), +1, unaligned | estimate);
650 
651         r2c = fftw.plan_r2c_fftw (n, (float*) in.getData(), in.getData(), unaligned | estimate);
652         c2r = fftw.plan_c2r_fftw (n, in.getData(), (float*) in.getData(), unaligned | estimate);
653     }
654 
~FFTWImpljuce::dsp::FFTWImpl655     ~FFTWImpl() override
656     {
657         ScopedLock lock (getFFTWPlanLock());
658 
659         fftw.destroy_fftw (c2cForward);
660         fftw.destroy_fftw (c2cInverse);
661         fftw.destroy_fftw (r2c);
662         fftw.destroy_fftw (c2r);
663     }
664 
performjuce::dsp::FFTWImpl665     void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
666     {
667         if (inverse)
668         {
669             auto n = (1u << order);
670             fftw.execute_dft_fftw (c2cInverse, input, output);
671             FloatVectorOperations::multiply ((float*) output, 1.0f / static_cast<float> (n), (int) n << 1);
672         }
673         else
674         {
675             fftw.execute_dft_fftw (c2cForward, input, output);
676         }
677     }
678 
performRealOnlyForwardTransformjuce::dsp::FFTWImpl679     void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
680     {
681         if (order == 0)
682             return;
683 
684         auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
685 
686         fftw.execute_r2c_fftw (r2c, inputOutputData, out);
687 
688         auto size = (1 << order);
689 
690         if (! ignoreNegativeFreqs)
691             for (int i = size >> 1; i < size; ++i)
692                 out[i] = std::conj (out[size - i]);
693     }
694 
performRealOnlyInverseTransformjuce::dsp::FFTWImpl695     void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
696     {
697         auto n = (1u << order);
698 
699         fftw.execute_c2r_fftw (c2r, (Complex<float>*) inputOutputData, inputOutputData);
700         FloatVectorOperations::multiply ((float*) inputOutputData, 1.0f / static_cast<float> (n), (int) n);
701     }
702 
703     //==============================================================================
704     // fftw's plan_* and destroy_* methods are NOT thread safe. So we need to share
705     // a lock between all instances of FFTWImpl
getFFTWPlanLockjuce::dsp::FFTWImpl706     static CriticalSection& getFFTWPlanLock() noexcept
707     {
708         static CriticalSection cs;
709         return cs;
710     }
711 
712     //==============================================================================
713     DynamicLibrary fftwLibrary;
714     Symbols fftw;
715     size_t order;
716 
717     FFTWPlanRef c2cForward, c2cInverse, r2c, c2r;
718 };
719 
720 FFT::EngineImpl<FFTWImpl> fftwEngine;
721 #endif
722 
723 //==============================================================================
724 //==============================================================================
725 #if JUCE_DSP_USE_INTEL_MKL
726 struct IntelFFT  : public FFT::Instance
727 {
728     static constexpr int priority = 8;
729 
succeededjuce::dsp::IntelFFT730     static bool succeeded (MKL_LONG status) noexcept        { return status == 0; }
731 
createjuce::dsp::IntelFFT732     static IntelFFT* create (int orderToUse)
733     {
734         DFTI_DESCRIPTOR_HANDLE mklc2c, mklc2r;
735 
736         if (DftiCreateDescriptor (&mklc2c, DFTI_SINGLE, DFTI_COMPLEX, 1, 1 << orderToUse) == 0)
737         {
738             if (succeeded (DftiSetValue (mklc2c, DFTI_PLACEMENT, DFTI_NOT_INPLACE))
739                  && succeeded (DftiSetValue (mklc2c, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
740                  && succeeded (DftiCommitDescriptor (mklc2c)))
741             {
742                 if (succeeded (DftiCreateDescriptor (&mklc2r, DFTI_SINGLE, DFTI_REAL, 1, 1 << orderToUse)))
743                 {
744                     if (succeeded (DftiSetValue (mklc2r, DFTI_PLACEMENT, DFTI_INPLACE))
745                          && succeeded (DftiSetValue (mklc2r, DFTI_BACKWARD_SCALE, 1.0f / static_cast<float> (1 << orderToUse)))
746                          && succeeded (DftiCommitDescriptor (mklc2r)))
747                     {
748                         return new IntelFFT (static_cast<size_t> (orderToUse), mklc2c, mklc2r);
749                     }
750 
751                     DftiFreeDescriptor (&mklc2r);
752                 }
753             }
754 
755             DftiFreeDescriptor (&mklc2c);
756         }
757 
758         return {};
759     }
760 
IntelFFTjuce::dsp::IntelFFT761     IntelFFT (size_t orderToUse, DFTI_DESCRIPTOR_HANDLE c2cToUse, DFTI_DESCRIPTOR_HANDLE cr2ToUse)
762         : order (orderToUse), c2c (c2cToUse), c2r (cr2ToUse)
763     {}
764 
~IntelFFTjuce::dsp::IntelFFT765     ~IntelFFT()
766     {
767         DftiFreeDescriptor (&c2c);
768         DftiFreeDescriptor (&c2r);
769     }
770 
performjuce::dsp::IntelFFT771     void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
772     {
773         if (inverse)
774             DftiComputeBackward (c2c, (void*) input, output);
775         else
776             DftiComputeForward (c2c, (void*) input, output);
777     }
778 
performRealOnlyForwardTransformjuce::dsp::IntelFFT779     void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
780     {
781         if (order == 0)
782             return;
783 
784         DftiComputeForward (c2r, inputOutputData);
785 
786         auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
787         auto size = (1 << order);
788 
789         if (! ignoreNegativeFreqs)
790             for (int i = size >> 1; i < size; ++i)
791                 out[i] = std::conj (out[size - i]);
792     }
793 
performRealOnlyInverseTransformjuce::dsp::IntelFFT794     void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
795     {
796         DftiComputeBackward (c2r, inputOutputData);
797     }
798 
799     size_t order;
800     DFTI_DESCRIPTOR_HANDLE c2c, c2r;
801 };
802 
803 FFT::EngineImpl<IntelFFT> fftwEngine;
804 #endif
805 
806 //==============================================================================
807 //==============================================================================
808 // Visual Studio should define no more than one of these, depending on the
809 // setting at 'Project' > 'Properties' > 'Configuration Properties' > 'Intel
810 // Performance Libraries' > 'Use Intel(R) IPP'
811 #if _IPP_SEQUENTIAL_STATIC || _IPP_SEQUENTIAL_DYNAMIC || _IPP_PARALLEL_STATIC || _IPP_PARALLEL_DYNAMIC
812 class IntelPerformancePrimitivesFFT : public FFT::Instance
813 {
814 public:
815     static constexpr auto priority = 9;
816 
create(const int order)817     static IntelPerformancePrimitivesFFT* create (const int order)
818     {
819         auto complexContext = Context<ComplexTraits>::create (order);
820         auto realContext    = Context<RealTraits>   ::create (order);
821 
822         if (complexContext.isValid() && realContext.isValid())
823             return new IntelPerformancePrimitivesFFT (std::move (complexContext), std::move (realContext), order);
824 
825         return {};
826     }
827 
perform(const Complex<float> * input,Complex<float> * output,bool inverse) const828     void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept override
829     {
830         if (inverse)
831         {
832             ippsFFTInv_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
833                                   reinterpret_cast<Ipp32fc*> (output),
834                                   cplx.specPtr,
835                                   cplx.workBuf.get());
836         }
837         else
838         {
839             ippsFFTFwd_CToC_32fc (reinterpret_cast<const Ipp32fc*> (input),
840                                   reinterpret_cast<Ipp32fc*> (output),
841                                   cplx.specPtr,
842                                   cplx.workBuf.get());
843         }
844     }
845 
performRealOnlyForwardTransform(float * inoutData,bool ignoreNegativeFreqs) const846     void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
847     {
848         ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get());
849 
850         if (order == 0)
851             return;
852 
853         auto* out = reinterpret_cast<Complex<float>*> (inoutData);
854         const auto size = (1 << order);
855 
856         if (! ignoreNegativeFreqs)
857             for (auto i = size >> 1; i < size; ++i)
858                 out[i] = std::conj (out[size - i]);
859     }
860 
performRealOnlyInverseTransform(float * inoutData) const861     void performRealOnlyInverseTransform (float* inoutData) const noexcept override
862     {
863         ippsFFTInv_CCSToR_32f_I (inoutData, real.specPtr, real.workBuf.get());
864     }
865 
866 private:
867     static constexpr auto flag = IPP_FFT_DIV_INV_BY_N;
868     static constexpr auto hint = ippAlgHintFast;
869 
870     struct IppFree
871     {
872         template <typename Ptr>
operator ()juce::dsp::IntelPerformancePrimitivesFFT::IppFree873         void operator() (Ptr* ptr) const noexcept { ippsFree (ptr); }
874     };
875 
876     using IppPtr = std::unique_ptr<Ipp8u[], IppFree>;
877 
878     template <typename Traits>
879     struct Context
880     {
881         using SpecPtr = typename Traits::Spec*;
882 
createjuce::dsp::IntelPerformancePrimitivesFFT::Context883         static Context create (const int order)
884         {
885             int specSize = 0, initSize = 0, workSize = 0;
886 
887             if (Traits::getSize (order, flag, hint, &specSize, &initSize, &workSize) != ippStsNoErr)
888                 return {};
889 
890             const auto initBuf = IppPtr (ippsMalloc_8u (initSize));
891             auto specBuf       = IppPtr (ippsMalloc_8u (specSize));
892             SpecPtr specPtr = nullptr;
893 
894             if (Traits::init (&specPtr, order, flag, hint, specBuf.get(), initBuf.get()) != ippStsNoErr)
895                 return {};
896 
897             if (reinterpret_cast<const Ipp8u*> (specPtr) != specBuf.get())
898                 return {};
899 
900             return { std::move (specBuf), IppPtr (ippsMalloc_8u (workSize)), specPtr };
901         }
902 
903         Context() noexcept = default;
904 
Contextjuce::dsp::IntelPerformancePrimitivesFFT::Context905         Context (IppPtr&& spec, IppPtr&& work, typename Traits::Spec* ptr) noexcept
906             : specBuf (std::move (spec)), workBuf (std::move (work)), specPtr (ptr)
907         {}
908 
isValidjuce::dsp::IntelPerformancePrimitivesFFT::Context909         bool isValid() const noexcept { return specPtr != nullptr; }
910 
911         IppPtr specBuf, workBuf;
912         SpecPtr specPtr = nullptr;
913     };
914 
915     struct ComplexTraits
916     {
917         static constexpr auto getSize = ippsFFTGetSize_C_32fc;
918         static constexpr auto init = ippsFFTInit_C_32fc;
919         using Spec = IppsFFTSpec_C_32fc;
920     };
921 
922     struct RealTraits
923     {
924         static constexpr auto getSize = ippsFFTGetSize_R_32f;
925         static constexpr auto init = ippsFFTInit_R_32f;
926         using Spec = IppsFFTSpec_R_32f;
927     };
928 
IntelPerformancePrimitivesFFT(Context<ComplexTraits> && complexToUse,Context<RealTraits> && realToUse,const int orderToUse)929     IntelPerformancePrimitivesFFT (Context<ComplexTraits>&& complexToUse,
930                                    Context<RealTraits>&& realToUse,
931                                    const int orderToUse)
932         : cplx (std::move (complexToUse)),
933           real (std::move (realToUse)),
934           order (orderToUse)
935     {}
936 
937     Context<ComplexTraits> cplx;
938     Context<RealTraits> real;
939     int order = 0;
940 };
941 
942 FFT::EngineImpl<IntelPerformancePrimitivesFFT> intelPerformancePrimitivesFFT;
943 #endif
944 
945 //==============================================================================
946 //==============================================================================
FFT(int order)947 FFT::FFT (int order)
948     : engine (FFT::Engine::createBestEngineForPlatform (order)),
949       size (1 << order)
950 {
951 }
952 
953 FFT::FFT (FFT&&) noexcept = default;
954 
955 FFT& FFT::operator= (FFT&&) noexcept = default;
956 
957 FFT::~FFT() = default;
958 
perform(const Complex<float> * input,Complex<float> * output,bool inverse) const959 void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept
960 {
961     if (engine != nullptr)
962         engine->perform (input, output, inverse);
963 }
964 
performRealOnlyForwardTransform(float * inputOutputData,bool ignoreNeagtiveFreqs) const965 void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNeagtiveFreqs) const noexcept
966 {
967     if (engine != nullptr)
968         engine->performRealOnlyForwardTransform (inputOutputData, ignoreNeagtiveFreqs);
969 }
970 
performRealOnlyInverseTransform(float * inputOutputData) const971 void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
972 {
973     if (engine != nullptr)
974         engine->performRealOnlyInverseTransform (inputOutputData);
975 }
976 
performFrequencyOnlyForwardTransform(float * inputOutputData) const977 void FFT::performFrequencyOnlyForwardTransform (float* inputOutputData) const noexcept
978 {
979     if (size == 1)
980         return;
981 
982     performRealOnlyForwardTransform (inputOutputData);
983     auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
984 
985     for (int i = 0; i < size; ++i)
986         inputOutputData[i] = std::abs (out[i]);
987 
988     zeromem (&inputOutputData[size], static_cast<size_t> (size) * sizeof (float));
989 }
990 
991 } // namespace dsp
992 } // namespace juce
993