1 /*
2  * Copyright (C) 2010 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14  *     its contributors may be used to endorse or promote products derived
15  *     from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "ReverbConvolver.h"
30 #include "ReverbConvolverStage.h"
31 
32 using namespace mozilla;
33 
34 namespace WebCore {
35 
36 const int InputBufferSize = 8 * 16384;
37 
38 // We only process the leading portion of the impulse response in the real-time thread.  We don't exceed this length.
39 // It turns out then, that the background thread has about 278msec of scheduling slop.
40 // Empirically, this has been found to be a good compromise between giving enough time for scheduling slop,
41 // while still minimizing the amount of processing done in the primary (high-priority) thread.
42 // This was found to be a good value on Mac OS X, and may work well on other platforms as well, assuming
43 // the very rough scheduling latencies are similar on these time-scales.  Of course, this code may need to be
44 // tuned for individual platforms if this assumption is found to be incorrect.
45 const size_t RealtimeFrameLimit = 8192 + 4096 // ~278msec @ 44.1KHz
46                                   - WEBAUDIO_BLOCK_SIZE;
47 // First stage will have size MinFFTSize - successive stages will double in
48 // size each time until we hit the maximum size.
49 const size_t MinFFTSize = 256;
50 // If we are using background threads then don't exceed this FFT size for the
51 // stages which run in the real-time thread.  This avoids having only one or
52 // two large stages (size 16384 or so) at the end which take a lot of time
53 // every several processing slices.  This way we amortize the cost over more
54 // processing slices.
55 const size_t MaxRealtimeFFTSize = 4096;
56 
ReverbConvolver(const float * impulseResponseData,size_t impulseResponseLength,size_t maxFFTSize,size_t convolverRenderPhase,bool useBackgroundThreads)57 ReverbConvolver::ReverbConvolver(const float* impulseResponseData,
58                                  size_t impulseResponseLength,
59                                  size_t maxFFTSize,
60                                  size_t convolverRenderPhase,
61                                  bool useBackgroundThreads)
62     : m_impulseResponseLength(impulseResponseLength)
63     , m_accumulationBuffer(impulseResponseLength + WEBAUDIO_BLOCK_SIZE)
64     , m_inputBuffer(InputBufferSize)
65     , m_backgroundThread("ConvolverWorker")
66     , m_backgroundThreadCondition(&m_backgroundThreadLock)
67     , m_useBackgroundThreads(useBackgroundThreads)
68     , m_wantsToExit(false)
69     , m_moreInputBuffered(false)
70 {
71     // For the moment, a good way to know if we have real-time constraint is to check if we're using background threads.
72     // Otherwise, assume we're being run from a command-line tool.
73     bool hasRealtimeConstraint = useBackgroundThreads;
74 
75     const float* response = impulseResponseData;
76     size_t totalResponseLength = impulseResponseLength;
77 
78     // The total latency is zero because the first FFT stage is small enough
79     // to return output in the first block.
80     size_t reverbTotalLatency = 0;
81 
82     size_t stageOffset = 0;
83     size_t stagePhase = 0;
84     size_t fftSize = MinFFTSize;
85     while (stageOffset < totalResponseLength) {
86         size_t stageSize = fftSize / 2;
87 
88         // For the last stage, it's possible that stageOffset is such that we're straddling the end
89         // of the impulse response buffer (if we use stageSize), so reduce the last stage's length...
90         if (stageSize + stageOffset > totalResponseLength) {
91             stageSize = totalResponseLength - stageOffset;
92             // Use smallest FFT that is large enough to cover the last stage.
93             fftSize = MinFFTSize;
94             while (stageSize * 2 > fftSize) {
95               fftSize *= 2;
96             }
97         }
98 
99         // This "staggers" the time when each FFT happens so they don't all happen at the same time
100         int renderPhase = convolverRenderPhase + stagePhase;
101 
102         nsAutoPtr<ReverbConvolverStage> stage
103           (new ReverbConvolverStage(response, totalResponseLength,
104                                     reverbTotalLatency, stageOffset, stageSize,
105                                     fftSize, renderPhase,
106                                     &m_accumulationBuffer));
107 
108         bool isBackgroundStage = false;
109 
110         if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) {
111             m_backgroundStages.AppendElement(stage.forget());
112             isBackgroundStage = true;
113         } else
114             m_stages.AppendElement(stage.forget());
115 
116         // Figure out next FFT size
117         fftSize *= 2;
118 
119         stageOffset += stageSize;
120 
121         if (hasRealtimeConstraint && !isBackgroundStage
122             && fftSize > MaxRealtimeFFTSize) {
123             fftSize = MaxRealtimeFFTSize;
124             // Custom phase positions for all but the first of the realtime
125             // stages of largest size.  These spread out the work of the
126             // larger realtime stages.  None of the FFTs of size 1024, 2048 or
127             // 4096 are performed when processing the same block.  The first
128             // MaxRealtimeFFTSize = 4096 stage, at the end of the doubling,
129             // performs its FFT at block 7.  The FFTs of size 2048 are
130             // performed in blocks 3 + 8 * n and size 1024 at 1 + 4 * n.
131             const uint32_t phaseLookup[] = { 14, 0, 10, 4 };
132             stagePhase = WEBAUDIO_BLOCK_SIZE *
133                 phaseLookup[m_stages.Length() % ArrayLength(phaseLookup)];
134         } else if (fftSize > maxFFTSize) {
135             fftSize = maxFFTSize;
136             // A prime offset spreads out FFTs in a way that all
137             // available phase positions will be used if there are sufficient
138             // stages.
139             stagePhase += 5 * WEBAUDIO_BLOCK_SIZE;
140         } else if (stageSize > WEBAUDIO_BLOCK_SIZE) {
141             // As the stages are doubling in size, the next FFT will occur
142             // mid-way between FFTs for this stage.
143             stagePhase = stageSize - WEBAUDIO_BLOCK_SIZE;
144         }
145     }
146 
147     // Start up background thread
148     // FIXME: would be better to up the thread priority here.  It doesn't need to be real-time, but higher than the default...
149     if (this->useBackgroundThreads() && m_backgroundStages.Length() > 0) {
150         if (!m_backgroundThread.Start()) {
151           NS_WARNING("Cannot start convolver thread.");
152           return;
153         }
154         m_backgroundThread.message_loop()->PostTask(NewNonOwningRunnableMethod(this,
155 									       &ReverbConvolver::backgroundThreadEntry));
156     }
157 }
158 
~ReverbConvolver()159 ReverbConvolver::~ReverbConvolver()
160 {
161     // Wait for background thread to stop
162     if (useBackgroundThreads() && m_backgroundThread.IsRunning()) {
163         m_wantsToExit = true;
164 
165         // Wake up thread so it can return
166         {
167             AutoLock locker(m_backgroundThreadLock);
168             m_moreInputBuffered = true;
169             m_backgroundThreadCondition.Signal();
170         }
171 
172         m_backgroundThread.Stop();
173     }
174 }
175 
sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const176 size_t ReverbConvolver::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
177 {
178     size_t amount = aMallocSizeOf(this);
179     amount += m_stages.ShallowSizeOfExcludingThis(aMallocSizeOf);
180     for (size_t i = 0; i < m_stages.Length(); i++) {
181         if (m_stages[i]) {
182             amount += m_stages[i]->sizeOfIncludingThis(aMallocSizeOf);
183         }
184     }
185 
186     amount += m_backgroundStages.ShallowSizeOfExcludingThis(aMallocSizeOf);
187     for (size_t i = 0; i < m_backgroundStages.Length(); i++) {
188         if (m_backgroundStages[i]) {
189             amount += m_backgroundStages[i]->sizeOfIncludingThis(aMallocSizeOf);
190         }
191     }
192 
193     // NB: The buffer sizes are static, so even though they might be accessed
194     //     in another thread it's safe to measure them.
195     amount += m_accumulationBuffer.sizeOfExcludingThis(aMallocSizeOf);
196     amount += m_inputBuffer.sizeOfExcludingThis(aMallocSizeOf);
197 
198     // Possible future measurements:
199     // - m_backgroundThread
200     // - m_backgroundThreadLock
201     // - m_backgroundThreadCondition
202     return amount;
203 }
204 
backgroundThreadEntry()205 void ReverbConvolver::backgroundThreadEntry()
206 {
207     while (!m_wantsToExit) {
208         // Wait for realtime thread to give us more input
209         m_moreInputBuffered = false;
210         {
211             AutoLock locker(m_backgroundThreadLock);
212             while (!m_moreInputBuffered && !m_wantsToExit)
213                 m_backgroundThreadCondition.Wait();
214         }
215 
216         // Process all of the stages until their read indices reach the input buffer's write index
217         int writeIndex = m_inputBuffer.writeIndex();
218 
219         // Even though it doesn't seem like every stage needs to maintain its own version of readIndex
220         // we do this in case we want to run in more than one background thread.
221         int readIndex;
222 
223         while ((readIndex = m_backgroundStages[0]->inputReadIndex()) != writeIndex) { // FIXME: do better to detect buffer overrun...
224             // Accumulate contributions from each stage
225             for (size_t i = 0; i < m_backgroundStages.Length(); ++i)
226                 m_backgroundStages[i]->processInBackground(this);
227         }
228     }
229 }
230 
process(const float * sourceChannelData,float * destinationChannelData)231 void ReverbConvolver::process(const float* sourceChannelData,
232                               float* destinationChannelData)
233 {
234     const float* source = sourceChannelData;
235     float* destination = destinationChannelData;
236     bool isDataSafe = source && destination;
237     MOZ_ASSERT(isDataSafe);
238     if (!isDataSafe)
239         return;
240 
241     // Feed input buffer (read by all threads)
242     m_inputBuffer.write(source, WEBAUDIO_BLOCK_SIZE);
243 
244     // Accumulate contributions from each stage
245     for (size_t i = 0; i < m_stages.Length(); ++i)
246         m_stages[i]->process(source);
247 
248     // Finally read from accumulation buffer
249     m_accumulationBuffer.readAndClear(destination, WEBAUDIO_BLOCK_SIZE);
250 
251     // Now that we've buffered more input, wake up our background thread.
252 
253     // Not using a MutexLocker looks strange, but we use a tryLock() instead because this is run on the real-time
254     // thread where it is a disaster for the lock to be contended (causes audio glitching).  It's OK if we fail to
255     // signal from time to time, since we'll get to it the next time we're called.  We're called repeatedly
256     // and frequently (around every 3ms).  The background thread is processing well into the future and has a considerable amount of
257     // leeway here...
258     if (m_backgroundThreadLock.Try()) {
259         m_moreInputBuffered = true;
260         m_backgroundThreadCondition.Signal();
261         m_backgroundThreadLock.Release();
262     }
263 }
264 
265 } // namespace WebCore
266