1 /*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "ReverbConvolver.h"
30 #include "ReverbConvolverStage.h"
31
32 using namespace mozilla;
33
34 namespace WebCore {
35
36 const int InputBufferSize = 8 * 16384;
37
38 // We only process the leading portion of the impulse response in the real-time thread. We don't exceed this length.
39 // It turns out then, that the background thread has about 278msec of scheduling slop.
40 // Empirically, this has been found to be a good compromise between giving enough time for scheduling slop,
41 // while still minimizing the amount of processing done in the primary (high-priority) thread.
42 // This was found to be a good value on Mac OS X, and may work well on other platforms as well, assuming
43 // the very rough scheduling latencies are similar on these time-scales. Of course, this code may need to be
44 // tuned for individual platforms if this assumption is found to be incorrect.
45 const size_t RealtimeFrameLimit = 8192 + 4096 // ~278msec @ 44.1KHz
46 - WEBAUDIO_BLOCK_SIZE;
47 // First stage will have size MinFFTSize - successive stages will double in
48 // size each time until we hit the maximum size.
49 const size_t MinFFTSize = 256;
50 // If we are using background threads then don't exceed this FFT size for the
51 // stages which run in the real-time thread. This avoids having only one or
52 // two large stages (size 16384 or so) at the end which take a lot of time
53 // every several processing slices. This way we amortize the cost over more
54 // processing slices.
55 const size_t MaxRealtimeFFTSize = 4096;
56
ReverbConvolver(const float * impulseResponseData,size_t impulseResponseLength,size_t maxFFTSize,size_t convolverRenderPhase,bool useBackgroundThreads)57 ReverbConvolver::ReverbConvolver(const float* impulseResponseData,
58 size_t impulseResponseLength,
59 size_t maxFFTSize,
60 size_t convolverRenderPhase,
61 bool useBackgroundThreads)
62 : m_impulseResponseLength(impulseResponseLength)
63 , m_accumulationBuffer(impulseResponseLength + WEBAUDIO_BLOCK_SIZE)
64 , m_inputBuffer(InputBufferSize)
65 , m_backgroundThread("ConvolverWorker")
66 , m_backgroundThreadCondition(&m_backgroundThreadLock)
67 , m_useBackgroundThreads(useBackgroundThreads)
68 , m_wantsToExit(false)
69 , m_moreInputBuffered(false)
70 {
71 // For the moment, a good way to know if we have real-time constraint is to check if we're using background threads.
72 // Otherwise, assume we're being run from a command-line tool.
73 bool hasRealtimeConstraint = useBackgroundThreads;
74
75 const float* response = impulseResponseData;
76 size_t totalResponseLength = impulseResponseLength;
77
78 // The total latency is zero because the first FFT stage is small enough
79 // to return output in the first block.
80 size_t reverbTotalLatency = 0;
81
82 size_t stageOffset = 0;
83 size_t stagePhase = 0;
84 size_t fftSize = MinFFTSize;
85 while (stageOffset < totalResponseLength) {
86 size_t stageSize = fftSize / 2;
87
88 // For the last stage, it's possible that stageOffset is such that we're straddling the end
89 // of the impulse response buffer (if we use stageSize), so reduce the last stage's length...
90 if (stageSize + stageOffset > totalResponseLength) {
91 stageSize = totalResponseLength - stageOffset;
92 // Use smallest FFT that is large enough to cover the last stage.
93 fftSize = MinFFTSize;
94 while (stageSize * 2 > fftSize) {
95 fftSize *= 2;
96 }
97 }
98
99 // This "staggers" the time when each FFT happens so they don't all happen at the same time
100 int renderPhase = convolverRenderPhase + stagePhase;
101
102 nsAutoPtr<ReverbConvolverStage> stage
103 (new ReverbConvolverStage(response, totalResponseLength,
104 reverbTotalLatency, stageOffset, stageSize,
105 fftSize, renderPhase,
106 &m_accumulationBuffer));
107
108 bool isBackgroundStage = false;
109
110 if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) {
111 m_backgroundStages.AppendElement(stage.forget());
112 isBackgroundStage = true;
113 } else
114 m_stages.AppendElement(stage.forget());
115
116 // Figure out next FFT size
117 fftSize *= 2;
118
119 stageOffset += stageSize;
120
121 if (hasRealtimeConstraint && !isBackgroundStage
122 && fftSize > MaxRealtimeFFTSize) {
123 fftSize = MaxRealtimeFFTSize;
124 // Custom phase positions for all but the first of the realtime
125 // stages of largest size. These spread out the work of the
126 // larger realtime stages. None of the FFTs of size 1024, 2048 or
127 // 4096 are performed when processing the same block. The first
128 // MaxRealtimeFFTSize = 4096 stage, at the end of the doubling,
129 // performs its FFT at block 7. The FFTs of size 2048 are
130 // performed in blocks 3 + 8 * n and size 1024 at 1 + 4 * n.
131 const uint32_t phaseLookup[] = { 14, 0, 10, 4 };
132 stagePhase = WEBAUDIO_BLOCK_SIZE *
133 phaseLookup[m_stages.Length() % ArrayLength(phaseLookup)];
134 } else if (fftSize > maxFFTSize) {
135 fftSize = maxFFTSize;
136 // A prime offset spreads out FFTs in a way that all
137 // available phase positions will be used if there are sufficient
138 // stages.
139 stagePhase += 5 * WEBAUDIO_BLOCK_SIZE;
140 } else if (stageSize > WEBAUDIO_BLOCK_SIZE) {
141 // As the stages are doubling in size, the next FFT will occur
142 // mid-way between FFTs for this stage.
143 stagePhase = stageSize - WEBAUDIO_BLOCK_SIZE;
144 }
145 }
146
147 // Start up background thread
148 // FIXME: would be better to up the thread priority here. It doesn't need to be real-time, but higher than the default...
149 if (this->useBackgroundThreads() && m_backgroundStages.Length() > 0) {
150 if (!m_backgroundThread.Start()) {
151 NS_WARNING("Cannot start convolver thread.");
152 return;
153 }
154 m_backgroundThread.message_loop()->PostTask(NewNonOwningRunnableMethod(this,
155 &ReverbConvolver::backgroundThreadEntry));
156 }
157 }
158
~ReverbConvolver()159 ReverbConvolver::~ReverbConvolver()
160 {
161 // Wait for background thread to stop
162 if (useBackgroundThreads() && m_backgroundThread.IsRunning()) {
163 m_wantsToExit = true;
164
165 // Wake up thread so it can return
166 {
167 AutoLock locker(m_backgroundThreadLock);
168 m_moreInputBuffered = true;
169 m_backgroundThreadCondition.Signal();
170 }
171
172 m_backgroundThread.Stop();
173 }
174 }
175
sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const176 size_t ReverbConvolver::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
177 {
178 size_t amount = aMallocSizeOf(this);
179 amount += m_stages.ShallowSizeOfExcludingThis(aMallocSizeOf);
180 for (size_t i = 0; i < m_stages.Length(); i++) {
181 if (m_stages[i]) {
182 amount += m_stages[i]->sizeOfIncludingThis(aMallocSizeOf);
183 }
184 }
185
186 amount += m_backgroundStages.ShallowSizeOfExcludingThis(aMallocSizeOf);
187 for (size_t i = 0; i < m_backgroundStages.Length(); i++) {
188 if (m_backgroundStages[i]) {
189 amount += m_backgroundStages[i]->sizeOfIncludingThis(aMallocSizeOf);
190 }
191 }
192
193 // NB: The buffer sizes are static, so even though they might be accessed
194 // in another thread it's safe to measure them.
195 amount += m_accumulationBuffer.sizeOfExcludingThis(aMallocSizeOf);
196 amount += m_inputBuffer.sizeOfExcludingThis(aMallocSizeOf);
197
198 // Possible future measurements:
199 // - m_backgroundThread
200 // - m_backgroundThreadLock
201 // - m_backgroundThreadCondition
202 return amount;
203 }
204
backgroundThreadEntry()205 void ReverbConvolver::backgroundThreadEntry()
206 {
207 while (!m_wantsToExit) {
208 // Wait for realtime thread to give us more input
209 m_moreInputBuffered = false;
210 {
211 AutoLock locker(m_backgroundThreadLock);
212 while (!m_moreInputBuffered && !m_wantsToExit)
213 m_backgroundThreadCondition.Wait();
214 }
215
216 // Process all of the stages until their read indices reach the input buffer's write index
217 int writeIndex = m_inputBuffer.writeIndex();
218
219 // Even though it doesn't seem like every stage needs to maintain its own version of readIndex
220 // we do this in case we want to run in more than one background thread.
221 int readIndex;
222
223 while ((readIndex = m_backgroundStages[0]->inputReadIndex()) != writeIndex) { // FIXME: do better to detect buffer overrun...
224 // Accumulate contributions from each stage
225 for (size_t i = 0; i < m_backgroundStages.Length(); ++i)
226 m_backgroundStages[i]->processInBackground(this);
227 }
228 }
229 }
230
process(const float * sourceChannelData,float * destinationChannelData)231 void ReverbConvolver::process(const float* sourceChannelData,
232 float* destinationChannelData)
233 {
234 const float* source = sourceChannelData;
235 float* destination = destinationChannelData;
236 bool isDataSafe = source && destination;
237 MOZ_ASSERT(isDataSafe);
238 if (!isDataSafe)
239 return;
240
241 // Feed input buffer (read by all threads)
242 m_inputBuffer.write(source, WEBAUDIO_BLOCK_SIZE);
243
244 // Accumulate contributions from each stage
245 for (size_t i = 0; i < m_stages.Length(); ++i)
246 m_stages[i]->process(source);
247
248 // Finally read from accumulation buffer
249 m_accumulationBuffer.readAndClear(destination, WEBAUDIO_BLOCK_SIZE);
250
251 // Now that we've buffered more input, wake up our background thread.
252
253 // Not using a MutexLocker looks strange, but we use a tryLock() instead because this is run on the real-time
254 // thread where it is a disaster for the lock to be contended (causes audio glitching). It's OK if we fail to
255 // signal from time to time, since we'll get to it the next time we're called. We're called repeatedly
256 // and frequently (around every 3ms). The background thread is processing well into the future and has a considerable amount of
257 // leeway here...
258 if (m_backgroundThreadLock.Try()) {
259 m_moreInputBuffered = true;
260 m_backgroundThreadCondition.Signal();
261 m_backgroundThreadLock.Release();
262 }
263 }
264
265 } // namespace WebCore
266