1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "ConvolverNode.h"
8 #include "mozilla/dom/ConvolverNodeBinding.h"
9 #include "AlignmentUtils.h"
10 #include "AudioNodeEngine.h"
11 #include "AudioNodeTrack.h"
12 #include "blink/Reverb.h"
13 #include "PlayingRefChangeHandler.h"
14 
15 namespace mozilla::dom {
16 
17 NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer)
18 
19 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode)
20 NS_INTERFACE_MAP_END_INHERITING(AudioNode)
21 
22 NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode)
23 NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode)
24 
25 class ConvolverNodeEngine final : public AudioNodeEngine {
26   typedef PlayingRefChangeHandler PlayingRefChanged;
27 
28  public:
ConvolverNodeEngine(AudioNode * aNode,bool aNormalize)29   ConvolverNodeEngine(AudioNode* aNode, bool aNormalize)
30       : AudioNodeEngine(aNode) {}
31 
32   // Indicates how the right output channel is generated.
33   enum class RightConvolverMode {
34     // A right convolver is always used when there is more than one impulse
35     // response channel.
36     Always,
37     // With a single response channel, the mode may be either Direct or
38     // Difference.  The decision on which to use is made when stereo input is
39     // received.  Once the right convolver is in use, convolver state is
40     // suitable only for the selected mode, and so the mode cannot change
41     // until the right convolver contains only silent history.
42     //
43     // With Direct mode, each convolver processes a corresponding channel.
44     // This mode is selected when input is initially stereo or
45     // channelInterpretation is "discrete" at the time or starting the right
46     // convolver when input changes from non-silent mono to stereo.
47     Direct,
48     // Difference mode is selected if channelInterpretation is "speakers" at
49     // the time starting the right convolver when the input changes from mono
50     // to stereo.
51     //
52     // When non-silent input is initially mono, with a single response
53     // channel, the right output channel is not produced until input becomes
54     // stereo.  Only a single convolver is used for mono processing.  When
55     // stereo input arrives after mono input, output must be as if the mono
56     // signal remaining in the left convolver is up-mixed, but the right
57     // convolver has not been initialized with the history of the mono input.
58     // Copying the state of the left convolver into the right convolver is not
59     // desirable, because there is considerable state to copy, and the
60     // different convolvers are intended to process out of phase, which means
61     // that state from one convolver would not directly map to state in
62     // another convolver.
63     //
64     // Instead the distributive property of convolution is used to generate
65     // the right output channel using information in the left output channel.
66     // Using l and r to denote the left and right channel input signals, g the
67     // impulse response, and * convolution, the convolution of the right
68     // channel can be given by
69     //
70     //   r * g = (l + (r - l)) * g
71     //         = l * g + (r - l) * g
72     //
73     // The left convolver continues to process the left channel l to produce
74     // l * g.  The right convolver processes the difference of input channel
75     // signals r - l to produce (r - l) * g.  The outputs of the two
76     // convolvers are added to generate the right channel output r * g.
77     //
78     // The benefit of doing this is that the history of the r - l input for a
79     // "speakers" up-mixed mono signal is zero, and so an empty convolver
80     // already has exactly the right history for mixing the previous mono
81     // signal with the new stereo signal.
82     Difference
83   };
84 
SetReverb(WebCore::Reverb * aReverb,uint32_t aImpulseChannelCount)85   void SetReverb(WebCore::Reverb* aReverb,
86                  uint32_t aImpulseChannelCount) override {
87     mRemainingLeftOutput = INT32_MIN;
88     mRemainingRightOutput = 0;
89     mRemainingRightHistory = 0;
90 
91     // Assume for now that convolution of channel difference is not required.
92     // Direct may change to Difference during processing.
93     if (aReverb) {
94       mRightConvolverMode = aImpulseChannelCount == 1
95                                 ? RightConvolverMode::Direct
96                                 : RightConvolverMode::Always;
97     } else {
98       mRightConvolverMode = RightConvolverMode::Always;
99     }
100 
101     mReverb.reset(aReverb);
102   }
103 
AllocateReverbInput(const AudioBlock & aInput,uint32_t aTotalChannelCount)104   void AllocateReverbInput(const AudioBlock& aInput,
105                            uint32_t aTotalChannelCount) {
106     uint32_t inputChannelCount = aInput.ChannelCount();
107     MOZ_ASSERT(inputChannelCount <= aTotalChannelCount);
108     mReverbInput.AllocateChannels(aTotalChannelCount);
109     // Pre-multiply the input's volume
110     for (uint32_t i = 0; i < inputChannelCount; ++i) {
111       const float* src = static_cast<const float*>(aInput.mChannelData[i]);
112       float* dest = mReverbInput.ChannelFloatsForWrite(i);
113       AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);
114     }
115     // Fill remaining channels with silence
116     for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) {
117       float* dest = mReverbInput.ChannelFloatsForWrite(i);
118       std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f);
119     }
120   }
121 
122   void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
123                     const AudioBlock& aInput, AudioBlock* aOutput,
124                     bool* aFinished) override;
125 
IsActive() const126   bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; }
127 
SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const128   size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
129     size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf);
130 
131     amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false);
132 
133     if (mReverb) {
134       amount += mReverb->sizeOfIncludingThis(aMallocSizeOf);
135     }
136 
137     return amount;
138   }
139 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const140   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
141     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
142   }
143 
144  private:
145   // Keeping mReverbInput across process calls avoids unnecessary reallocation.
146   AudioBlock mReverbInput;
147   UniquePtr<WebCore::Reverb> mReverb;
148   // Tracks samples of the tail remaining to be output.  INT32_MIN is a
149   // special value to indicate that the end of any previous tail has been
150   // handled.
151   int32_t mRemainingLeftOutput = INT32_MIN;
152   // mRemainingRightOutput and mRemainingRightHistory are only used when
153   // mRightOutputMode != Always.  There is no special handling required at the
154   // end of tail times and so INT32_MIN is not used.
155   // mRemainingRightOutput tracks how much longer this node needs to continue
156   // to produce a right output channel.
157   int32_t mRemainingRightOutput = 0;
158   // mRemainingRightHistory tracks how much silent input would be required to
159   // drain the right convolver, which may sometimes be longer than the period
160   // a right output channel is required.
161   int32_t mRemainingRightHistory = 0;
162   RightConvolverMode mRightConvolverMode = RightConvolverMode::Always;
163 };
164 
AddScaledLeftToRight(AudioBlock * aBlock,float aScale)165 static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) {
166   const float* left = static_cast<const float*>(aBlock->mChannelData[0]);
167   float* right = aBlock->ChannelFloatsForWrite(1);
168   AudioBlockAddChannelWithScale(left, aScale, right);
169 }
170 
ProcessBlock(AudioNodeTrack * aTrack,GraphTime aFrom,const AudioBlock & aInput,AudioBlock * aOutput,bool * aFinished)171 void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
172                                        const AudioBlock& aInput,
173                                        AudioBlock* aOutput, bool* aFinished) {
174   if (!mReverb) {
175     aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
176     return;
177   }
178 
179   uint32_t inputChannelCount = aInput.ChannelCount();
180   if (aInput.IsNull()) {
181     if (mRemainingLeftOutput > 0) {
182       mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE;
183       AllocateReverbInput(aInput, 1);  // floats for silence
184     } else {
185       if (mRemainingLeftOutput != INT32_MIN) {
186         mRemainingLeftOutput = INT32_MIN;
187         MOZ_ASSERT(mRemainingRightOutput <= 0);
188         MOZ_ASSERT(mRemainingRightHistory <= 0);
189         aTrack->ScheduleCheckForInactive();
190         RefPtr<PlayingRefChanged> refchanged =
191             new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE);
192         aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
193       }
194       aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
195       return;
196     }
197   } else {
198     if (mRemainingLeftOutput <= 0) {
199       RefPtr<PlayingRefChanged> refchanged =
200           new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF);
201       aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
202     }
203 
204     // Use mVolume as a flag to detect whether AllocateReverbInput() gets
205     // called.
206     mReverbInput.mVolume = 0.0f;
207 
208     // Special handling of input channel count changes is used when there is
209     // only a single impulse response channel.  See RightConvolverMode.
210     if (mRightConvolverMode != RightConvolverMode::Always) {
211       ChannelInterpretation channelInterpretation =
212           aTrack->GetChannelInterpretation();
213       if (inputChannelCount == 2) {
214         if (mRemainingRightHistory <= 0) {
215           // Will start the second convolver.  Choose to convolve the right
216           // channel directly if there is no left tail to up-mix or up-mixing
217           // is "discrete".
218           mRightConvolverMode =
219               (mRemainingLeftOutput <= 0 ||
220                channelInterpretation == ChannelInterpretation::Discrete)
221                   ? RightConvolverMode::Direct
222                   : RightConvolverMode::Difference;
223         }
224         // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
225         mRemainingRightOutput =
226             mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
227         mRemainingRightHistory = mRemainingRightOutput;
228         if (mRightConvolverMode == RightConvolverMode::Difference) {
229           AllocateReverbInput(aInput, 2);
230           // Subtract left from right.
231           AddScaledLeftToRight(&mReverbInput, -1.0f);
232         }
233       } else if (mRemainingRightHistory > 0) {
234         // There is one channel of input, but a second convolver also
235         // requires input.  Up-mix appropriately for the second convolver.
236         if ((mRightConvolverMode == RightConvolverMode::Difference) ^
237             (channelInterpretation == ChannelInterpretation::Discrete)) {
238           MOZ_ASSERT(
239               (mRightConvolverMode == RightConvolverMode::Difference &&
240                channelInterpretation == ChannelInterpretation::Speakers) ||
241               (mRightConvolverMode == RightConvolverMode::Direct &&
242                channelInterpretation == ChannelInterpretation::Discrete));
243           // The state is one of the following combinations:
244           // 1) Difference and speakers.
245           //    Up-mixing gives r = l.
246           //    The input to the second convolver is r - l.
247           // 2) Direct and discrete.
248           //    Up-mixing gives r = 0.
249           //    The input to the second convolver is r.
250           //
251           // In each case the input for the second convolver is silence, which
252           // will drain the convolver.
253           AllocateReverbInput(aInput, 2);
254         } else {
255           if (channelInterpretation == ChannelInterpretation::Discrete) {
256             MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference);
257             // channelInterpretation has changed since the second convolver
258             // was added.  "discrete" up-mixing of input would produce a
259             // silent right channel r = 0, but the second convolver needs
260             // r - l for RightConvolverMode::Difference.
261             AllocateReverbInput(aInput, 2);
262             AddScaledLeftToRight(&mReverbInput, -1.0f);
263           } else {
264             MOZ_ASSERT(channelInterpretation ==
265                        ChannelInterpretation::Speakers);
266             MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct);
267             // The Reverb will essentially up-mix the single input channel by
268             // feeding it into both convolvers.
269           }
270           // The second convolver does not have silent input, and so it will
271           // not drain.  It will need to continue processing up-mixed input
272           // because the next input block may be stereo, which would be mixed
273           // with the signal remaining in the convolvers.
274           // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
275           mRemainingRightHistory =
276               mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
277         }
278       }
279     }
280 
281     if (mReverbInput.mVolume == 0.0f) {  // not yet set
282       if (aInput.mVolume != 1.0f) {
283         AllocateReverbInput(aInput, inputChannelCount);  // pre-multiply
284       } else {
285         mReverbInput = aInput;
286       }
287     }
288 
289     mRemainingLeftOutput = mReverb->impulseResponseLength();
290     MOZ_ASSERT(mRemainingLeftOutput > 0);
291   }
292 
293   // "The ConvolverNode produces a mono output only in the single case where
294   // there is a single input channel and a single-channel buffer."
295   uint32_t outputChannelCount = 2;
296   uint32_t reverbOutputChannelCount = 2;
297   if (mRightConvolverMode != RightConvolverMode::Always) {
298     // When the input changes from stereo to mono, the output continues to be
299     // stereo for the length of the tail time, during which the two channels
300     // may differ.
301     if (mRemainingRightOutput > 0) {
302       MOZ_ASSERT(mRemainingRightHistory > 0);
303       mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE;
304     } else {
305       outputChannelCount = 1;
306     }
307     // The second convolver keeps processing until it drains.
308     if (mRemainingRightHistory > 0) {
309       mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE;
310     } else {
311       reverbOutputChannelCount = 1;
312     }
313   }
314 
315   // If there are two convolvers, then they each need an output buffer, even
316   // if the second convolver is only processing to keep history of up-mixed
317   // input.
318   aOutput->AllocateChannels(reverbOutputChannelCount);
319 
320   mReverb->process(&mReverbInput, aOutput);
321 
322   if (mRightConvolverMode == RightConvolverMode::Difference &&
323       outputChannelCount == 2) {
324     // Add left to right.
325     AddScaledLeftToRight(aOutput, 1.0f);
326   } else {
327     // Trim if outputChannelCount < reverbOutputChannelCount
328     aOutput->mChannelData.TruncateLength(outputChannelCount);
329   }
330 }
331 
ConvolverNode(AudioContext * aContext)332 ConvolverNode::ConvolverNode(AudioContext* aContext)
333     : AudioNode(aContext, 2, ChannelCountMode::Clamped_max,
334                 ChannelInterpretation::Speakers),
335       mNormalize(true) {
336   ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize);
337   mTrack = AudioNodeTrack::Create(
338       aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph());
339 }
340 
341 /* static */
Create(JSContext * aCx,AudioContext & aAudioContext,const ConvolverOptions & aOptions,ErrorResult & aRv)342 already_AddRefed<ConvolverNode> ConvolverNode::Create(
343     JSContext* aCx, AudioContext& aAudioContext,
344     const ConvolverOptions& aOptions, ErrorResult& aRv) {
345   RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext);
346 
347   audioNode->Initialize(aOptions, aRv);
348   if (NS_WARN_IF(aRv.Failed())) {
349     return nullptr;
350   }
351 
352   // This must be done before setting the buffer.
353   audioNode->SetNormalize(!aOptions.mDisableNormalization);
354 
355   if (aOptions.mBuffer.WasPassed()) {
356     MOZ_ASSERT(aCx);
357     audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv);
358     if (NS_WARN_IF(aRv.Failed())) {
359       return nullptr;
360     }
361   }
362 
363   return audioNode.forget();
364 }
365 
SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const366 size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
367   size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf);
368   if (mBuffer) {
369     // NB: mBuffer might be shared with the associated engine, by convention
370     //     the AudioNode will report.
371     amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
372   }
373   return amount;
374 }
375 
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const376 size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
377   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
378 }
379 
WrapObject(JSContext * aCx,JS::Handle<JSObject * > aGivenProto)380 JSObject* ConvolverNode::WrapObject(JSContext* aCx,
381                                     JS::Handle<JSObject*> aGivenProto) {
382   return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto);
383 }
384 
SetBuffer(JSContext * aCx,AudioBuffer * aBuffer,ErrorResult & aRv)385 void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer,
386                               ErrorResult& aRv) {
387   if (aBuffer) {
388     switch (aBuffer->NumberOfChannels()) {
389       case 1:
390       case 2:
391       case 4:
392         // Supported number of channels
393         break;
394       default:
395         aRv.ThrowNotSupportedError(
396             nsPrintfCString("%u is not a supported number of channels",
397                             aBuffer->NumberOfChannels()));
398         return;
399     }
400   }
401 
402   if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) {
403     aRv.ThrowNotSupportedError(nsPrintfCString(
404         "Buffer sample rate (%g) does not match AudioContext sample rate (%g)",
405         aBuffer->SampleRate(), Context()->SampleRate()));
406     return;
407   }
408 
409   // Send the buffer to the track
410   AudioNodeTrack* ns = mTrack;
411   MOZ_ASSERT(ns, "Why don't we have a track here?");
412   if (aBuffer) {
413     AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx);
414     if (data.mBufferFormat == AUDIO_FORMAT_S16) {
415       // Reverb expects data in float format.
416       // Convert on the main thread so as to minimize allocations on the audio
417       // thread.
418       // Reverb will dispose of the buffer once initialized, so convert here
419       // and leave the smaller arrays in the AudioBuffer.
420       // There is currently no value in providing 16/32-byte aligned data
421       // because PadAndMakeScaledDFT() will copy the data (without SIMD
422       // instructions) to aligned arrays for the FFT.
423       CheckedInt<size_t> bufferSize(sizeof(float));
424       bufferSize *= data.mDuration;
425       bufferSize *= data.ChannelCount();
426       RefPtr<SharedBuffer> floatBuffer =
427           SharedBuffer::Create(bufferSize, fallible);
428       if (!floatBuffer) {
429         aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
430         return;
431       }
432       auto floatData = static_cast<float*>(floatBuffer->Data());
433       for (size_t i = 0; i < data.ChannelCount(); ++i) {
434         ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData,
435                             data.mDuration);
436         data.mChannelData[i] = floatData;
437         floatData += data.mDuration;
438       }
439       data.mBuffer = std::move(floatBuffer);
440       data.mBufferFormat = AUDIO_FORMAT_FLOAT32;
441     } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) {
442       // This is valid, but a signal convolved by a silent signal is silent, set
443       // the reverb to nullptr and return.
444       ns->SetReverb(nullptr, 0);
445       mBuffer = aBuffer;
446       return;
447     }
448 
449     // Note about empirical tuning (this is copied from Blink)
450     // The maximum FFT size affects reverb performance and accuracy.
451     // If the reverb is single-threaded and processes entirely in the real-time
452     // audio thread, it's important not to make this too high.  In this case
453     // 8192 is a good value. But, the Reverb object is multi-threaded, so we
454     // want this as high as possible without losing too much accuracy. Very
455     // large FFTs will have worse phase errors. Given these constraints 32768 is
456     // a good compromise.
457     const size_t MaxFFTSize = 32768;
458 
459     bool allocationFailure = false;
460     UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb(
461         data, MaxFFTSize, !Context()->IsOffline(), mNormalize,
462         aBuffer->SampleRate(), &allocationFailure));
463     if (!allocationFailure) {
464       ns->SetReverb(reverb.release(), data.ChannelCount());
465     } else {
466       aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
467       return;
468     }
469   } else {
470     ns->SetReverb(nullptr, 0);
471   }
472   mBuffer = aBuffer;
473 }
474 
SetNormalize(bool aNormalize)475 void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; }
476 
477 }  // namespace mozilla::dom
478