1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "ConvolverNode.h"
8 #include "mozilla/dom/ConvolverNodeBinding.h"
9 #include "AlignmentUtils.h"
10 #include "AudioNodeEngine.h"
11 #include "AudioNodeTrack.h"
12 #include "blink/Reverb.h"
13 #include "PlayingRefChangeHandler.h"
14
15 namespace mozilla::dom {
16
17 NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer)
18
19 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode)
20 NS_INTERFACE_MAP_END_INHERITING(AudioNode)
21
22 NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode)
23 NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode)
24
25 class ConvolverNodeEngine final : public AudioNodeEngine {
26 typedef PlayingRefChangeHandler PlayingRefChanged;
27
28 public:
ConvolverNodeEngine(AudioNode * aNode,bool aNormalize)29 ConvolverNodeEngine(AudioNode* aNode, bool aNormalize)
30 : AudioNodeEngine(aNode) {}
31
32 // Indicates how the right output channel is generated.
33 enum class RightConvolverMode {
34 // A right convolver is always used when there is more than one impulse
35 // response channel.
36 Always,
37 // With a single response channel, the mode may be either Direct or
38 // Difference. The decision on which to use is made when stereo input is
39 // received. Once the right convolver is in use, convolver state is
40 // suitable only for the selected mode, and so the mode cannot change
41 // until the right convolver contains only silent history.
42 //
43 // With Direct mode, each convolver processes a corresponding channel.
44 // This mode is selected when input is initially stereo or
45 // channelInterpretation is "discrete" at the time or starting the right
46 // convolver when input changes from non-silent mono to stereo.
47 Direct,
48 // Difference mode is selected if channelInterpretation is "speakers" at
49 // the time starting the right convolver when the input changes from mono
50 // to stereo.
51 //
52 // When non-silent input is initially mono, with a single response
53 // channel, the right output channel is not produced until input becomes
54 // stereo. Only a single convolver is used for mono processing. When
55 // stereo input arrives after mono input, output must be as if the mono
56 // signal remaining in the left convolver is up-mixed, but the right
57 // convolver has not been initialized with the history of the mono input.
58 // Copying the state of the left convolver into the right convolver is not
59 // desirable, because there is considerable state to copy, and the
60 // different convolvers are intended to process out of phase, which means
61 // that state from one convolver would not directly map to state in
62 // another convolver.
63 //
64 // Instead the distributive property of convolution is used to generate
65 // the right output channel using information in the left output channel.
66 // Using l and r to denote the left and right channel input signals, g the
67 // impulse response, and * convolution, the convolution of the right
68 // channel can be given by
69 //
70 // r * g = (l + (r - l)) * g
71 // = l * g + (r - l) * g
72 //
73 // The left convolver continues to process the left channel l to produce
74 // l * g. The right convolver processes the difference of input channel
75 // signals r - l to produce (r - l) * g. The outputs of the two
76 // convolvers are added to generate the right channel output r * g.
77 //
78 // The benefit of doing this is that the history of the r - l input for a
79 // "speakers" up-mixed mono signal is zero, and so an empty convolver
80 // already has exactly the right history for mixing the previous mono
81 // signal with the new stereo signal.
82 Difference
83 };
84
SetReverb(WebCore::Reverb * aReverb,uint32_t aImpulseChannelCount)85 void SetReverb(WebCore::Reverb* aReverb,
86 uint32_t aImpulseChannelCount) override {
87 mRemainingLeftOutput = INT32_MIN;
88 mRemainingRightOutput = 0;
89 mRemainingRightHistory = 0;
90
91 // Assume for now that convolution of channel difference is not required.
92 // Direct may change to Difference during processing.
93 if (aReverb) {
94 mRightConvolverMode = aImpulseChannelCount == 1
95 ? RightConvolverMode::Direct
96 : RightConvolverMode::Always;
97 } else {
98 mRightConvolverMode = RightConvolverMode::Always;
99 }
100
101 mReverb.reset(aReverb);
102 }
103
AllocateReverbInput(const AudioBlock & aInput,uint32_t aTotalChannelCount)104 void AllocateReverbInput(const AudioBlock& aInput,
105 uint32_t aTotalChannelCount) {
106 uint32_t inputChannelCount = aInput.ChannelCount();
107 MOZ_ASSERT(inputChannelCount <= aTotalChannelCount);
108 mReverbInput.AllocateChannels(aTotalChannelCount);
109 // Pre-multiply the input's volume
110 for (uint32_t i = 0; i < inputChannelCount; ++i) {
111 const float* src = static_cast<const float*>(aInput.mChannelData[i]);
112 float* dest = mReverbInput.ChannelFloatsForWrite(i);
113 AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);
114 }
115 // Fill remaining channels with silence
116 for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) {
117 float* dest = mReverbInput.ChannelFloatsForWrite(i);
118 std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f);
119 }
120 }
121
122 void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
123 const AudioBlock& aInput, AudioBlock* aOutput,
124 bool* aFinished) override;
125
IsActive() const126 bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; }
127
SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const128 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
129 size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf);
130
131 amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false);
132
133 if (mReverb) {
134 amount += mReverb->sizeOfIncludingThis(aMallocSizeOf);
135 }
136
137 return amount;
138 }
139
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const140 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
141 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
142 }
143
144 private:
145 // Keeping mReverbInput across process calls avoids unnecessary reallocation.
146 AudioBlock mReverbInput;
147 UniquePtr<WebCore::Reverb> mReverb;
148 // Tracks samples of the tail remaining to be output. INT32_MIN is a
149 // special value to indicate that the end of any previous tail has been
150 // handled.
151 int32_t mRemainingLeftOutput = INT32_MIN;
152 // mRemainingRightOutput and mRemainingRightHistory are only used when
153 // mRightOutputMode != Always. There is no special handling required at the
154 // end of tail times and so INT32_MIN is not used.
155 // mRemainingRightOutput tracks how much longer this node needs to continue
156 // to produce a right output channel.
157 int32_t mRemainingRightOutput = 0;
158 // mRemainingRightHistory tracks how much silent input would be required to
159 // drain the right convolver, which may sometimes be longer than the period
160 // a right output channel is required.
161 int32_t mRemainingRightHistory = 0;
162 RightConvolverMode mRightConvolverMode = RightConvolverMode::Always;
163 };
164
AddScaledLeftToRight(AudioBlock * aBlock,float aScale)165 static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) {
166 const float* left = static_cast<const float*>(aBlock->mChannelData[0]);
167 float* right = aBlock->ChannelFloatsForWrite(1);
168 AudioBlockAddChannelWithScale(left, aScale, right);
169 }
170
ProcessBlock(AudioNodeTrack * aTrack,GraphTime aFrom,const AudioBlock & aInput,AudioBlock * aOutput,bool * aFinished)171 void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
172 const AudioBlock& aInput,
173 AudioBlock* aOutput, bool* aFinished) {
174 if (!mReverb) {
175 aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
176 return;
177 }
178
179 uint32_t inputChannelCount = aInput.ChannelCount();
180 if (aInput.IsNull()) {
181 if (mRemainingLeftOutput > 0) {
182 mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE;
183 AllocateReverbInput(aInput, 1); // floats for silence
184 } else {
185 if (mRemainingLeftOutput != INT32_MIN) {
186 mRemainingLeftOutput = INT32_MIN;
187 MOZ_ASSERT(mRemainingRightOutput <= 0);
188 MOZ_ASSERT(mRemainingRightHistory <= 0);
189 aTrack->ScheduleCheckForInactive();
190 RefPtr<PlayingRefChanged> refchanged =
191 new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE);
192 aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
193 }
194 aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
195 return;
196 }
197 } else {
198 if (mRemainingLeftOutput <= 0) {
199 RefPtr<PlayingRefChanged> refchanged =
200 new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF);
201 aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
202 }
203
204 // Use mVolume as a flag to detect whether AllocateReverbInput() gets
205 // called.
206 mReverbInput.mVolume = 0.0f;
207
208 // Special handling of input channel count changes is used when there is
209 // only a single impulse response channel. See RightConvolverMode.
210 if (mRightConvolverMode != RightConvolverMode::Always) {
211 ChannelInterpretation channelInterpretation =
212 aTrack->GetChannelInterpretation();
213 if (inputChannelCount == 2) {
214 if (mRemainingRightHistory <= 0) {
215 // Will start the second convolver. Choose to convolve the right
216 // channel directly if there is no left tail to up-mix or up-mixing
217 // is "discrete".
218 mRightConvolverMode =
219 (mRemainingLeftOutput <= 0 ||
220 channelInterpretation == ChannelInterpretation::Discrete)
221 ? RightConvolverMode::Direct
222 : RightConvolverMode::Difference;
223 }
224 // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
225 mRemainingRightOutput =
226 mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
227 mRemainingRightHistory = mRemainingRightOutput;
228 if (mRightConvolverMode == RightConvolverMode::Difference) {
229 AllocateReverbInput(aInput, 2);
230 // Subtract left from right.
231 AddScaledLeftToRight(&mReverbInput, -1.0f);
232 }
233 } else if (mRemainingRightHistory > 0) {
234 // There is one channel of input, but a second convolver also
235 // requires input. Up-mix appropriately for the second convolver.
236 if ((mRightConvolverMode == RightConvolverMode::Difference) ^
237 (channelInterpretation == ChannelInterpretation::Discrete)) {
238 MOZ_ASSERT(
239 (mRightConvolverMode == RightConvolverMode::Difference &&
240 channelInterpretation == ChannelInterpretation::Speakers) ||
241 (mRightConvolverMode == RightConvolverMode::Direct &&
242 channelInterpretation == ChannelInterpretation::Discrete));
243 // The state is one of the following combinations:
244 // 1) Difference and speakers.
245 // Up-mixing gives r = l.
246 // The input to the second convolver is r - l.
247 // 2) Direct and discrete.
248 // Up-mixing gives r = 0.
249 // The input to the second convolver is r.
250 //
251 // In each case the input for the second convolver is silence, which
252 // will drain the convolver.
253 AllocateReverbInput(aInput, 2);
254 } else {
255 if (channelInterpretation == ChannelInterpretation::Discrete) {
256 MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference);
257 // channelInterpretation has changed since the second convolver
258 // was added. "discrete" up-mixing of input would produce a
259 // silent right channel r = 0, but the second convolver needs
260 // r - l for RightConvolverMode::Difference.
261 AllocateReverbInput(aInput, 2);
262 AddScaledLeftToRight(&mReverbInput, -1.0f);
263 } else {
264 MOZ_ASSERT(channelInterpretation ==
265 ChannelInterpretation::Speakers);
266 MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct);
267 // The Reverb will essentially up-mix the single input channel by
268 // feeding it into both convolvers.
269 }
270 // The second convolver does not have silent input, and so it will
271 // not drain. It will need to continue processing up-mixed input
272 // because the next input block may be stereo, which would be mixed
273 // with the signal remaining in the convolvers.
274 // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
275 mRemainingRightHistory =
276 mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
277 }
278 }
279 }
280
281 if (mReverbInput.mVolume == 0.0f) { // not yet set
282 if (aInput.mVolume != 1.0f) {
283 AllocateReverbInput(aInput, inputChannelCount); // pre-multiply
284 } else {
285 mReverbInput = aInput;
286 }
287 }
288
289 mRemainingLeftOutput = mReverb->impulseResponseLength();
290 MOZ_ASSERT(mRemainingLeftOutput > 0);
291 }
292
293 // "The ConvolverNode produces a mono output only in the single case where
294 // there is a single input channel and a single-channel buffer."
295 uint32_t outputChannelCount = 2;
296 uint32_t reverbOutputChannelCount = 2;
297 if (mRightConvolverMode != RightConvolverMode::Always) {
298 // When the input changes from stereo to mono, the output continues to be
299 // stereo for the length of the tail time, during which the two channels
300 // may differ.
301 if (mRemainingRightOutput > 0) {
302 MOZ_ASSERT(mRemainingRightHistory > 0);
303 mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE;
304 } else {
305 outputChannelCount = 1;
306 }
307 // The second convolver keeps processing until it drains.
308 if (mRemainingRightHistory > 0) {
309 mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE;
310 } else {
311 reverbOutputChannelCount = 1;
312 }
313 }
314
315 // If there are two convolvers, then they each need an output buffer, even
316 // if the second convolver is only processing to keep history of up-mixed
317 // input.
318 aOutput->AllocateChannels(reverbOutputChannelCount);
319
320 mReverb->process(&mReverbInput, aOutput);
321
322 if (mRightConvolverMode == RightConvolverMode::Difference &&
323 outputChannelCount == 2) {
324 // Add left to right.
325 AddScaledLeftToRight(aOutput, 1.0f);
326 } else {
327 // Trim if outputChannelCount < reverbOutputChannelCount
328 aOutput->mChannelData.TruncateLength(outputChannelCount);
329 }
330 }
331
ConvolverNode(AudioContext * aContext)332 ConvolverNode::ConvolverNode(AudioContext* aContext)
333 : AudioNode(aContext, 2, ChannelCountMode::Clamped_max,
334 ChannelInterpretation::Speakers),
335 mNormalize(true) {
336 ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize);
337 mTrack = AudioNodeTrack::Create(
338 aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph());
339 }
340
341 /* static */
Create(JSContext * aCx,AudioContext & aAudioContext,const ConvolverOptions & aOptions,ErrorResult & aRv)342 already_AddRefed<ConvolverNode> ConvolverNode::Create(
343 JSContext* aCx, AudioContext& aAudioContext,
344 const ConvolverOptions& aOptions, ErrorResult& aRv) {
345 RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext);
346
347 audioNode->Initialize(aOptions, aRv);
348 if (NS_WARN_IF(aRv.Failed())) {
349 return nullptr;
350 }
351
352 // This must be done before setting the buffer.
353 audioNode->SetNormalize(!aOptions.mDisableNormalization);
354
355 if (aOptions.mBuffer.WasPassed()) {
356 MOZ_ASSERT(aCx);
357 audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv);
358 if (NS_WARN_IF(aRv.Failed())) {
359 return nullptr;
360 }
361 }
362
363 return audioNode.forget();
364 }
365
SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const366 size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
367 size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf);
368 if (mBuffer) {
369 // NB: mBuffer might be shared with the associated engine, by convention
370 // the AudioNode will report.
371 amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
372 }
373 return amount;
374 }
375
SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const376 size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
377 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
378 }
379
WrapObject(JSContext * aCx,JS::Handle<JSObject * > aGivenProto)380 JSObject* ConvolverNode::WrapObject(JSContext* aCx,
381 JS::Handle<JSObject*> aGivenProto) {
382 return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto);
383 }
384
SetBuffer(JSContext * aCx,AudioBuffer * aBuffer,ErrorResult & aRv)385 void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer,
386 ErrorResult& aRv) {
387 if (aBuffer) {
388 switch (aBuffer->NumberOfChannels()) {
389 case 1:
390 case 2:
391 case 4:
392 // Supported number of channels
393 break;
394 default:
395 aRv.ThrowNotSupportedError(
396 nsPrintfCString("%u is not a supported number of channels",
397 aBuffer->NumberOfChannels()));
398 return;
399 }
400 }
401
402 if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) {
403 aRv.ThrowNotSupportedError(nsPrintfCString(
404 "Buffer sample rate (%g) does not match AudioContext sample rate (%g)",
405 aBuffer->SampleRate(), Context()->SampleRate()));
406 return;
407 }
408
409 // Send the buffer to the track
410 AudioNodeTrack* ns = mTrack;
411 MOZ_ASSERT(ns, "Why don't we have a track here?");
412 if (aBuffer) {
413 AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx);
414 if (data.mBufferFormat == AUDIO_FORMAT_S16) {
415 // Reverb expects data in float format.
416 // Convert on the main thread so as to minimize allocations on the audio
417 // thread.
418 // Reverb will dispose of the buffer once initialized, so convert here
419 // and leave the smaller arrays in the AudioBuffer.
420 // There is currently no value in providing 16/32-byte aligned data
421 // because PadAndMakeScaledDFT() will copy the data (without SIMD
422 // instructions) to aligned arrays for the FFT.
423 CheckedInt<size_t> bufferSize(sizeof(float));
424 bufferSize *= data.mDuration;
425 bufferSize *= data.ChannelCount();
426 RefPtr<SharedBuffer> floatBuffer =
427 SharedBuffer::Create(bufferSize, fallible);
428 if (!floatBuffer) {
429 aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
430 return;
431 }
432 auto floatData = static_cast<float*>(floatBuffer->Data());
433 for (size_t i = 0; i < data.ChannelCount(); ++i) {
434 ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData,
435 data.mDuration);
436 data.mChannelData[i] = floatData;
437 floatData += data.mDuration;
438 }
439 data.mBuffer = std::move(floatBuffer);
440 data.mBufferFormat = AUDIO_FORMAT_FLOAT32;
441 } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) {
442 // This is valid, but a signal convolved by a silent signal is silent, set
443 // the reverb to nullptr and return.
444 ns->SetReverb(nullptr, 0);
445 mBuffer = aBuffer;
446 return;
447 }
448
449 // Note about empirical tuning (this is copied from Blink)
450 // The maximum FFT size affects reverb performance and accuracy.
451 // If the reverb is single-threaded and processes entirely in the real-time
452 // audio thread, it's important not to make this too high. In this case
453 // 8192 is a good value. But, the Reverb object is multi-threaded, so we
454 // want this as high as possible without losing too much accuracy. Very
455 // large FFTs will have worse phase errors. Given these constraints 32768 is
456 // a good compromise.
457 const size_t MaxFFTSize = 32768;
458
459 bool allocationFailure = false;
460 UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb(
461 data, MaxFFTSize, !Context()->IsOffline(), mNormalize,
462 aBuffer->SampleRate(), &allocationFailure));
463 if (!allocationFailure) {
464 ns->SetReverb(reverb.release(), data.ChannelCount());
465 } else {
466 aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
467 return;
468 }
469 } else {
470 ns->SetReverb(nullptr, 0);
471 }
472 mBuffer = aBuffer;
473 }
474
SetNormalize(bool aNormalize)475 void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; }
476
477 } // namespace mozilla::dom
478