1 /*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "ReverbConvolver.h"
30 #include "ReverbConvolverStage.h"
31
32 using namespace mozilla;
33
34 namespace WebCore {
35
36 const int InputBufferSize = 8 * 16384;
37
38 // We only process the leading portion of the impulse response in the real-time
39 // thread. We don't exceed this length. It turns out then, that the background
40 // thread has about 278msec of scheduling slop. Empirically, this has been found
41 // to be a good compromise between giving enough time for scheduling slop, while
42 // still minimizing the amount of processing done in the primary (high-priority)
43 // thread. This was found to be a good value on Mac OS X, and may work well on
44 // other platforms as well, assuming the very rough scheduling latencies are
45 // similar on these time-scales. Of course, this code may need to be tuned for
46 // individual platforms if this assumption is found to be incorrect.
47 const size_t RealtimeFrameLimit = 8192 + 4096 // ~278msec @ 44.1KHz
48 - WEBAUDIO_BLOCK_SIZE;
49 // First stage will have size MinFFTSize - successive stages will double in
50 // size each time until we hit the maximum size.
51 const size_t MinFFTSize = 256;
52 // If we are using background threads then don't exceed this FFT size for the
53 // stages which run in the real-time thread. This avoids having only one or
54 // two large stages (size 16384 or so) at the end which take a lot of time
55 // every several processing slices. This way we amortize the cost over more
56 // processing slices.
57 const size_t MaxRealtimeFFTSize = 4096;
58
ReverbConvolver(const float * impulseResponseData,size_t impulseResponseLength,size_t maxFFTSize,size_t convolverRenderPhase,bool useBackgroundThreads,bool * aAllocationFailure)59 ReverbConvolver::ReverbConvolver(const float* impulseResponseData,
60 size_t impulseResponseLength,
61 size_t maxFFTSize, size_t convolverRenderPhase,
62 bool useBackgroundThreads,
63 bool* aAllocationFailure)
64 : m_impulseResponseLength(impulseResponseLength),
65 m_accumulationBuffer(),
66 m_inputBuffer(InputBufferSize),
67 m_backgroundThread("ConvolverWorker"),
68 m_backgroundThreadMonitor("ConvolverMonitor"),
69 m_useBackgroundThreads(useBackgroundThreads),
70 m_wantsToExit(false),
71 m_moreInputBuffered(false) {
72 *aAllocationFailure = !m_accumulationBuffer.allocate(impulseResponseLength +
73 WEBAUDIO_BLOCK_SIZE);
74 if (*aAllocationFailure) {
75 return;
76 }
77 // For the moment, a good way to know if we have real-time constraint is to
78 // check if we're using background threads. Otherwise, assume we're being run
79 // from a command-line tool.
80 bool hasRealtimeConstraint = useBackgroundThreads;
81
82 const float* response = impulseResponseData;
83 size_t totalResponseLength = impulseResponseLength;
84
85 // The total latency is zero because the first FFT stage is small enough
86 // to return output in the first block.
87 size_t reverbTotalLatency = 0;
88
89 size_t stageOffset = 0;
90 size_t stagePhase = 0;
91 size_t fftSize = MinFFTSize;
92 while (stageOffset < totalResponseLength) {
93 size_t stageSize = fftSize / 2;
94
95 // For the last stage, it's possible that stageOffset is such that we're
96 // straddling the end of the impulse response buffer (if we use stageSize),
97 // so reduce the last stage's length...
98 if (stageSize + stageOffset > totalResponseLength) {
99 stageSize = totalResponseLength - stageOffset;
100 // Use smallest FFT that is large enough to cover the last stage.
101 fftSize = MinFFTSize;
102 while (stageSize * 2 > fftSize) {
103 fftSize *= 2;
104 }
105 }
106
107 // This "staggers" the time when each FFT happens so they don't all happen
108 // at the same time
109 int renderPhase = convolverRenderPhase + stagePhase;
110
111 UniquePtr<ReverbConvolverStage> stage(new ReverbConvolverStage(
112 response, totalResponseLength, reverbTotalLatency, stageOffset,
113 stageSize, fftSize, renderPhase, &m_accumulationBuffer));
114
115 bool isBackgroundStage = false;
116
117 if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) {
118 m_backgroundStages.AppendElement(std::move(stage));
119 isBackgroundStage = true;
120 } else
121 m_stages.AppendElement(std::move(stage));
122
123 // Figure out next FFT size
124 fftSize *= 2;
125
126 stageOffset += stageSize;
127
128 if (hasRealtimeConstraint && !isBackgroundStage &&
129 fftSize > MaxRealtimeFFTSize) {
130 fftSize = MaxRealtimeFFTSize;
131 // Custom phase positions for all but the first of the realtime
132 // stages of largest size. These spread out the work of the
133 // larger realtime stages. None of the FFTs of size 1024, 2048 or
134 // 4096 are performed when processing the same block. The first
135 // MaxRealtimeFFTSize = 4096 stage, at the end of the doubling,
136 // performs its FFT at block 7. The FFTs of size 2048 are
137 // performed in blocks 3 + 8 * n and size 1024 at 1 + 4 * n.
138 const uint32_t phaseLookup[] = {14, 0, 10, 4};
139 stagePhase = WEBAUDIO_BLOCK_SIZE *
140 phaseLookup[m_stages.Length() % ArrayLength(phaseLookup)];
141 } else if (fftSize > maxFFTSize) {
142 fftSize = maxFFTSize;
143 // A prime offset spreads out FFTs in a way that all
144 // available phase positions will be used if there are sufficient
145 // stages.
146 stagePhase += 5 * WEBAUDIO_BLOCK_SIZE;
147 } else if (stageSize > WEBAUDIO_BLOCK_SIZE) {
148 // As the stages are doubling in size, the next FFT will occur
149 // mid-way between FFTs for this stage.
150 stagePhase = stageSize - WEBAUDIO_BLOCK_SIZE;
151 }
152 }
153
154 // Start up background thread
155 // FIXME: would be better to up the thread priority here. It doesn't need to
156 // be real-time, but higher than the default...
157 if (this->useBackgroundThreads() && m_backgroundStages.Length() > 0) {
158 if (!m_backgroundThread.Start()) {
159 NS_WARNING("Cannot start convolver thread.");
160 return;
161 }
162 m_backgroundThread.message_loop()->PostTask(NewNonOwningRunnableMethod(
163 "WebCore::ReverbConvolver::backgroundThreadEntry", this,
164 &ReverbConvolver::backgroundThreadEntry));
165 }
166 }
167
~ReverbConvolver()168 ReverbConvolver::~ReverbConvolver() {
169 // Wait for background thread to stop
170 if (useBackgroundThreads() && m_backgroundThread.IsRunning()) {
171 m_wantsToExit = true;
172
173 // Wake up thread so it can return
174 {
175 MonitorAutoLock locker(m_backgroundThreadMonitor);
176 m_moreInputBuffered = true;
177 m_backgroundThreadMonitor.Notify();
178 }
179
180 m_backgroundThread.Stop();
181 }
182 }
183
sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const184 size_t ReverbConvolver::sizeOfIncludingThis(
185 mozilla::MallocSizeOf aMallocSizeOf) const {
186 size_t amount = aMallocSizeOf(this);
187 amount += m_stages.ShallowSizeOfExcludingThis(aMallocSizeOf);
188 for (size_t i = 0; i < m_stages.Length(); i++) {
189 if (m_stages[i]) {
190 amount += m_stages[i]->sizeOfIncludingThis(aMallocSizeOf);
191 }
192 }
193
194 amount += m_backgroundStages.ShallowSizeOfExcludingThis(aMallocSizeOf);
195 for (size_t i = 0; i < m_backgroundStages.Length(); i++) {
196 if (m_backgroundStages[i]) {
197 amount += m_backgroundStages[i]->sizeOfIncludingThis(aMallocSizeOf);
198 }
199 }
200
201 // NB: The buffer sizes are static, so even though they might be accessed
202 // in another thread it's safe to measure them.
203 amount += m_accumulationBuffer.sizeOfExcludingThis(aMallocSizeOf);
204 amount += m_inputBuffer.sizeOfExcludingThis(aMallocSizeOf);
205
206 // Possible future measurements:
207 // - m_backgroundThread
208 // - m_backgroundThreadMonitor
209 return amount;
210 }
211
backgroundThreadEntry()212 void ReverbConvolver::backgroundThreadEntry() {
213 while (!m_wantsToExit) {
214 // Wait for realtime thread to give us more input
215 m_moreInputBuffered = false;
216 {
217 MonitorAutoLock locker(m_backgroundThreadMonitor);
218 while (!m_moreInputBuffered && !m_wantsToExit)
219 m_backgroundThreadMonitor.Wait();
220 }
221
222 // Process all of the stages until their read indices reach the input
223 // buffer's write index
224 int writeIndex = m_inputBuffer.writeIndex();
225
226 // Even though it doesn't seem like every stage needs to maintain its own
227 // version of readIndex we do this in case we want to run in more than one
228 // background thread.
229 int readIndex;
230
231 while ((readIndex = m_backgroundStages[0]->inputReadIndex()) !=
232 writeIndex) { // FIXME: do better to detect buffer overrun...
233 // Accumulate contributions from each stage
234 for (size_t i = 0; i < m_backgroundStages.Length(); ++i)
235 m_backgroundStages[i]->processInBackground(this);
236 }
237 }
238 }
239
process(const float * sourceChannelData,float * destinationChannelData)240 void ReverbConvolver::process(const float* sourceChannelData,
241 float* destinationChannelData) {
242 const float* source = sourceChannelData;
243 float* destination = destinationChannelData;
244 bool isDataSafe = source && destination;
245 MOZ_ASSERT(isDataSafe);
246 if (!isDataSafe) return;
247
248 // Feed input buffer (read by all threads)
249 m_inputBuffer.write(source, WEBAUDIO_BLOCK_SIZE);
250
251 // Accumulate contributions from each stage
252 for (size_t i = 0; i < m_stages.Length(); ++i) m_stages[i]->process(source);
253
254 // Finally read from accumulation buffer
255 m_accumulationBuffer.readAndClear(destination, WEBAUDIO_BLOCK_SIZE);
256
257 // Now that we've buffered more input, wake up our background thread.
258
259 // Not using a MonitorAutoLock looks strange, but we use a TryLock() instead
260 // because this is run on the real-time thread where it is a disaster for the
261 // lock to be contended (causes audio glitching). It's OK if we fail to
262 // signal from time to time, since we'll get to it the next time we're called.
263 // We're called repeatedly and frequently (around every 3ms). The background
264 // thread is processing well into the future and has a considerable amount of
265 // leeway here...
266 if (m_backgroundThreadMonitor.TryLock()) {
267 m_moreInputBuffered = true;
268 m_backgroundThreadMonitor.Notify();
269 m_backgroundThreadMonitor.Unlock();
270 }
271 }
272
273 } // namespace WebCore
274