1 /*
2  * Copyright (C) 2010 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 // FFTFrame implementation using Intel's Math Kernel Library (MKL),
27 // suitable for use on Windows and Linux.
28 
29 #include "config.h"
30 
31 #if ENABLE(WEB_AUDIO)
32 
33 #if !OS(DARWIN) && USE(WEBAUDIO_MKL)
34 
35 #include "FFTFrame.h"
36 
37 #include "mkl_vml.h"
38 #include <wtf/MathExtras.h>
39 
40 namespace {
41 
createDescriptorHandle(int fftSize)42 DFTI_DESCRIPTOR_HANDLE createDescriptorHandle(int fftSize)
43 {
44     DFTI_DESCRIPTOR_HANDLE handle = 0;
45 
46     // Create DFTI descriptor for 1D single precision transform.
47     MKL_LONG status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, fftSize);
48     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
49 
50     // Set placement of result to DFTI_NOT_INPLACE.
51     status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
52     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
53 
54     // Set packing format to PERM; this produces the layout which
55     // matches Accelerate.framework's on the Mac, though interleaved.
56     status = DftiSetValue(handle, DFTI_PACKED_FORMAT, DFTI_PERM_FORMAT);
57     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
58 
59     // Set the forward scale factor to 2 to match Accelerate.framework's.
60     // FIXME: FFTFrameMac's scaling factor could be fixed to be 1.0,
61     // in which case this code would need to be changed as well.
62     status = DftiSetValue(handle, DFTI_FORWARD_SCALE, 2.0);
63     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
64 
65     // Set the backward scale factor to 1 / 2n to match Accelerate.framework's.
66     // FIXME: if the above scaling factor is fixed then this needs to be as well.
67     double scale = 1.0 / (2.0 * fftSize);
68     status = DftiSetValue(handle, DFTI_BACKWARD_SCALE, scale);
69     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
70 
71     // Use the default DFTI_CONJUGATE_EVEN_STORAGE = DFTI_COMPLEX_REAL.
72 
73     // Commit DFTI descriptor.
74     status = DftiCommitDescriptor(handle);
75     ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
76 
77     return handle;
78 }
79 
80 } // anonymous namespace
81 
82 namespace WebCore {
83 
84 const int kMaxFFTPow2Size = 24;
85 
86 DFTI_DESCRIPTOR_HANDLE* FFTFrame::descriptorHandles = 0;
87 
88 // Normal constructor: allocates for a given fftSize.
FFTFrame(unsigned fftSize)89 FFTFrame::FFTFrame(unsigned fftSize)
90     : m_FFTSize(fftSize)
91     , m_log2FFTSize(static_cast<unsigned>(log2(fftSize)))
92     , m_handle(0)
93     , m_complexData(fftSize)
94     , m_realData(fftSize / 2)
95     , m_imagData(fftSize / 2)
96 {
97     // We only allow power of two.
98     ASSERT(1UL << m_log2FFTSize == m_FFTSize);
99 
100     m_handle = descriptorHandleForSize(fftSize);
101 }
102 
103 // Creates a blank/empty frame (interpolate() must later be called).
FFTFrame()104 FFTFrame::FFTFrame()
105     : m_FFTSize(0)
106     , m_log2FFTSize(0)
107     , m_handle(0)
108 {
109 }
110 
111 // Copy constructor.
FFTFrame(const FFTFrame & frame)112 FFTFrame::FFTFrame(const FFTFrame& frame)
113     : m_FFTSize(frame.m_FFTSize)
114     , m_log2FFTSize(frame.m_log2FFTSize)
115     , m_handle(0)
116     , m_complexData(frame.m_FFTSize)
117     , m_realData(frame.m_FFTSize / 2)
118     , m_imagData(frame.m_FFTSize / 2)
119 {
120     m_handle = descriptorHandleForSize(m_FFTSize);
121 
122     // Copy/setup frame data.
123     unsigned nbytes = sizeof(float) * (m_FFTSize / 2);
124     memcpy(realData(), frame.realData(), nbytes);
125     memcpy(imagData(), frame.imagData(), nbytes);
126 }
127 
~FFTFrame()128 FFTFrame::~FFTFrame()
129 {
130 }
131 
multiply(const FFTFrame & frame)132 void FFTFrame::multiply(const FFTFrame& frame)
133 {
134     FFTFrame& frame1 = *this;
135     FFTFrame& frame2 = const_cast<FFTFrame&>(frame);
136 
137     float* realP1 = frame1.realData();
138     float* imagP1 = frame1.imagData();
139     const float* realP2 = frame2.realData();
140     const float* imagP2 = frame2.imagData();
141 
142     // Scale accounts for vecLib's peculiar scaling.
143     // This ensures the right scaling all the way back to inverse FFT.
144     // FIXME: this scaling factor will be 1.0f if the above 2.0 -> 1.0
145     // scaling factor is fixed.
146     float scale = 0.5f;
147 
148     // Multiply packed DC/nyquist component.
149     realP1[0] *= scale * realP2[0];
150     imagP1[0] *= scale * imagP2[0];
151 
152     // Multiply the rest, skipping packed DC/Nyquist components.
153     float* interleavedData1 = frame1.getUpToDateComplexData();
154     float* interleavedData2 = frame2.getUpToDateComplexData();
155 
156     unsigned halfSize = m_FFTSize / 2;
157 
158     // Complex multiply.
159     vcMul(halfSize - 1,
160           reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1,
161           reinterpret_cast<MKL_Complex8*>(interleavedData2) + 1,
162           reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1);
163 
164     // De-interleave and scale the rest of the data.
165     // FIXME: find an MKL routine to do at least the scaling more efficiently.
166     for (unsigned i = 1; i < halfSize; ++i) {
167         int baseComplexIndex = 2 * i;
168         realP1[i] = scale * interleavedData1[baseComplexIndex];
169         imagP1[i] = scale * interleavedData1[baseComplexIndex + 1];
170     }
171 }
172 
doFFT(float * data)173 void FFTFrame::doFFT(float* data)
174 {
175     // Compute Forward transform.
176     MKL_LONG status = DftiComputeForward(m_handle, data, m_complexData.data());
177     ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
178 
179     // De-interleave to separate real and complex arrays. FIXME:
180     // figure out if it's possible to get MKL to use split-complex
181     // form for 1D real-to-complex out-of-place FFTs.
182     int len = m_FFTSize / 2;
183     for (int i = 0; i < len; ++i) {
184         int baseComplexIndex = 2 * i;
185         // m_realData[0] is the DC component and m_imagData[0] the
186         // Nyquist component since the interleaved complex data is
187         // packed.
188         m_realData[i] = m_complexData[baseComplexIndex];
189         m_imagData[i] = m_complexData[baseComplexIndex + 1];
190     }
191 }
192 
doInverseFFT(float * data)193 void FFTFrame::doInverseFFT(float* data)
194 {
195     // Prepare interleaved data. FIXME: figure out if it's possible to
196     // get MKL to use split-complex form for 1D backward
197     // (complex-to-real) out-of-place FFTs.
198     float* interleavedData = getUpToDateComplexData();
199 
200     // Compute backward transform.
201     MKL_LONG status = DftiComputeBackward(m_handle, interleavedData, data);
202     ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
203 }
204 
initialize()205 void FFTFrame::initialize()
206 {
207 }
208 
cleanup()209 void FFTFrame::cleanup()
210 {
211     if (!descriptorHandles)
212         return;
213 
214     for (int i = 0; i < kMaxFFTPow2Size; ++i) {
215         if (descriptorHandles[i]) {
216             MKL_LONG status = DftiFreeDescriptor(&descriptorHandles[i]);
217             ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
218         }
219     }
220 
221     delete[] descriptorHandles;
222     descriptorHandles = 0;
223 }
224 
realData() const225 float* FFTFrame::realData() const
226 {
227     return const_cast<float*>(m_realData.data());
228 }
229 
imagData() const230 float* FFTFrame::imagData() const
231 {
232     return const_cast<float*>(m_imagData.data());
233 }
234 
getUpToDateComplexData()235 float* FFTFrame::getUpToDateComplexData()
236 {
237     // FIXME: if we can't completely get rid of this method, SSE
238     // optimization could be considered if it shows up hot on profiles.
239     int len = m_FFTSize / 2;
240     for (int i = 0; i < len; ++i) {
241         int baseComplexIndex = 2 * i;
242         m_complexData[baseComplexIndex] = m_realData[i];
243         m_complexData[baseComplexIndex + 1] = m_imagData[i];
244     }
245     return const_cast<float*>(m_complexData.data());
246 }
247 
descriptorHandleForSize(unsigned fftSize)248 DFTI_DESCRIPTOR_HANDLE FFTFrame::descriptorHandleForSize(unsigned fftSize)
249 {
250     if (!descriptorHandles) {
251         descriptorHandles = new DFTI_DESCRIPTOR_HANDLE[kMaxFFTPow2Size];
252         for (int i = 0; i < kMaxFFTPow2Size; ++i)
253             descriptorHandles[i] = 0;
254     }
255 
256     ASSERT(fftSize);
257     int pow2size = static_cast<int>(log2(fftSize));
258     ASSERT(pow2size < kMaxFFTPow2Size);
259     if (!descriptorHandles[pow2size])
260         descriptorHandles[pow2size] = createDescriptorHandle(fftSize);
261     return descriptorHandles[pow2size];
262 }
263 
264 } // namespace WebCore
265 
266 #endif // !OS(DARWIN) && USE(WEBAUDIO_MKL)
267 
268 #endif // ENABLE(WEB_AUDIO)
269