1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #define _USE_MATH_DEFINES
12
13 #include "modules/audio_processing/beamformer/nonlinear_beamformer.h"
14
15 #include <algorithm>
16 #include <cmath>
17 #include <numeric>
18 #include <vector>
19
20 #include "common_audio/window_generator.h"
21 #include "modules/audio_processing/beamformer/covariance_matrix_generator.h"
22 #include "rtc_base/arraysize.h"
23
24 namespace webrtc {
25 namespace {
26
27 // Alpha for the Kaiser Bessel Derived window.
28 const float kKbdAlpha = 1.5f;
29
30 const float kSpeedOfSoundMeterSeconds = 343;
31
32 // The minimum separation in radians between the target direction and an
33 // interferer scenario.
34 const float kMinAwayRadians = 0.2f;
35
36 // The separation between the target direction and the closest interferer
37 // scenario is proportional to this constant.
38 const float kAwaySlope = 0.008f;
39
40 // When calculating the interference covariance matrix, this is the weight for
41 // the weighted average between the uniform covariance matrix and the angled
42 // covariance matrix.
43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
44 const float kBalance = 0.95f;
45
46 // Alpha coefficients for mask smoothing.
47 const float kMaskTimeSmoothAlpha = 0.2f;
48 const float kMaskFrequencySmoothAlpha = 0.6f;
49
50 // The average mask is computed from masks in this mid-frequency range. If these
51 // ranges are changed |kMaskQuantile| might need to be adjusted.
52 const int kLowMeanStartHz = 200;
53 const int kLowMeanEndHz = 400;
54
55 // Range limiter for subtractive terms in the nominator and denominator of the
56 // postfilter expression. It handles the scenario mismatch between the true and
57 // model sources (target and interference).
58 const float kCutOffConstant = 0.9999f;
59
60 // Quantile of mask values which is used to estimate target presence.
61 const float kMaskQuantile = 0.7f;
62 // Mask threshold over which the data is considered signal and not interference.
63 // It has to be updated every time the postfilter calculation is changed
64 // significantly.
65 // TODO(aluebs): Write a tool to tune the target threshold automatically based
66 // on files annotated with target and interference ground truth.
67 const float kMaskTargetThreshold = 0.01f;
68 // Time in seconds after which the data is considered interference if the mask
69 // does not pass |kMaskTargetThreshold|.
70 const float kHoldTargetSeconds = 0.25f;
71
72 // To compensate for the attenuation this algorithm introduces to the target
73 // signal. It was estimated empirically from a low-noise low-reverberation
74 // recording from broadside.
75 const float kCompensationGain = 2.f;
76
77 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
78 // used; to accomplish this, we compute both multiplications in the same loop.
79 // The returned norm is clamped to be non-negative.
Norm(const ComplexMatrix<float> & mat,const ComplexMatrix<float> & norm_mat)80 float Norm(const ComplexMatrix<float>& mat,
81 const ComplexMatrix<float>& norm_mat) {
82 RTC_CHECK_EQ(1, norm_mat.num_rows());
83 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
84 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
85
86 complex<float> first_product = complex<float>(0.f, 0.f);
87 complex<float> second_product = complex<float>(0.f, 0.f);
88
89 const complex<float>* const* mat_els = mat.elements();
90 const complex<float>* const* norm_mat_els = norm_mat.elements();
91
92 for (size_t i = 0; i < norm_mat.num_columns(); ++i) {
93 for (size_t j = 0; j < norm_mat.num_columns(); ++j) {
94 first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
95 }
96 second_product += first_product * norm_mat_els[0][i];
97 first_product = 0.f;
98 }
99 return std::max(second_product.real(), 0.f);
100 }
101
102 // Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
ConjugateDotProduct(const ComplexMatrix<float> & lhs,const ComplexMatrix<float> & rhs)103 complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
104 const ComplexMatrix<float>& rhs) {
105 RTC_CHECK_EQ(1, lhs.num_rows());
106 RTC_CHECK_EQ(1, rhs.num_rows());
107 RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
108
109 const complex<float>* const* lhs_elements = lhs.elements();
110 const complex<float>* const* rhs_elements = rhs.elements();
111
112 complex<float> result = complex<float>(0.f, 0.f);
113 for (size_t i = 0; i < lhs.num_columns(); ++i) {
114 result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
115 }
116
117 return result;
118 }
119
120 // Works for positive numbers only.
Round(float x)121 size_t Round(float x) {
122 return static_cast<size_t>(std::floor(x + 0.5f));
123 }
124
125 // Calculates the sum of squares of a complex matrix.
SumSquares(const ComplexMatrix<float> & mat)126 float SumSquares(const ComplexMatrix<float>& mat) {
127 float sum_squares = 0.f;
128 const complex<float>* const* mat_els = mat.elements();
129 for (size_t i = 0; i < mat.num_rows(); ++i) {
130 for (size_t j = 0; j < mat.num_columns(); ++j) {
131 float abs_value = std::abs(mat_els[i][j]);
132 sum_squares += abs_value * abs_value;
133 }
134 }
135 return sum_squares;
136 }
137
138 // Does |out| = |in|.' * conj(|in|) for row vector |in|.
TransposedConjugatedProduct(const ComplexMatrix<float> & in,ComplexMatrix<float> * out)139 void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
140 ComplexMatrix<float>* out) {
141 RTC_CHECK_EQ(1, in.num_rows());
142 RTC_CHECK_EQ(out->num_rows(), in.num_columns());
143 RTC_CHECK_EQ(out->num_columns(), in.num_columns());
144 const complex<float>* in_elements = in.elements()[0];
145 complex<float>* const* out_elements = out->elements();
146 for (size_t i = 0; i < out->num_rows(); ++i) {
147 for (size_t j = 0; j < out->num_columns(); ++j) {
148 out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
149 }
150 }
151 }
152
GetCenteredArray(std::vector<Point> array_geometry)153 std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
154 for (size_t dim = 0; dim < 3; ++dim) {
155 float center = 0.f;
156 for (size_t i = 0; i < array_geometry.size(); ++i) {
157 center += array_geometry[i].c[dim];
158 }
159 center /= array_geometry.size();
160 for (size_t i = 0; i < array_geometry.size(); ++i) {
161 array_geometry[i].c[dim] -= center;
162 }
163 }
164 return array_geometry;
165 }
166
167 } // namespace
168
169 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
170
171 // static
172 const size_t NonlinearBeamformer::kNumFreqBins;
173
PostFilterTransform(size_t num_channels,size_t chunk_length,float * window,size_t fft_size)174 PostFilterTransform::PostFilterTransform(size_t num_channels,
175 size_t chunk_length,
176 float* window,
177 size_t fft_size)
178 : transform_(num_channels,
179 num_channels,
180 chunk_length,
181 window,
182 fft_size,
183 fft_size / 2,
184 this),
185 num_freq_bins_(fft_size / 2 + 1) {}
186
ProcessChunk(float * const * data,float * final_mask)187 void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) {
188 final_mask_ = final_mask;
189 transform_.ProcessChunk(data, data);
190 }
191
ProcessAudioBlock(const complex<float> * const * input,size_t num_input_channels,size_t num_freq_bins,size_t num_output_channels,complex<float> * const * output)192 void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input,
193 size_t num_input_channels,
194 size_t num_freq_bins,
195 size_t num_output_channels,
196 complex<float>* const* output) {
197 RTC_DCHECK_EQ(num_freq_bins_, num_freq_bins);
198 RTC_DCHECK_EQ(num_input_channels, num_output_channels);
199
200 for (size_t ch = 0; ch < num_input_channels; ++ch) {
201 for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) {
202 output[ch][f_ix] =
203 kCompensationGain * final_mask_[f_ix] * input[ch][f_ix];
204 }
205 }
206 }
207
NonlinearBeamformer(const std::vector<Point> & array_geometry,size_t num_postfilter_channels,SphericalPointf target_direction)208 NonlinearBeamformer::NonlinearBeamformer(
209 const std::vector<Point>& array_geometry,
210 size_t num_postfilter_channels,
211 SphericalPointf target_direction)
212 : num_input_channels_(array_geometry.size()),
213 num_postfilter_channels_(num_postfilter_channels),
214 array_geometry_(GetCenteredArray(array_geometry)),
215 array_normal_(GetArrayNormalIfExists(array_geometry)),
216 min_mic_spacing_(GetMinimumSpacing(array_geometry)),
217 target_angle_radians_(target_direction.azimuth()),
218 away_radians_(std::min(
219 static_cast<float>(M_PI),
220 std::max(kMinAwayRadians,
221 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
222 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
223 }
224
225 NonlinearBeamformer::~NonlinearBeamformer() = default;
226
Initialize(int chunk_size_ms,int sample_rate_hz)227 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
228 chunk_length_ =
229 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
230 sample_rate_hz_ = sample_rate_hz;
231
232 high_pass_postfilter_mask_ = 1.f;
233 is_target_present_ = false;
234 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
235 interference_blocks_count_ = hold_target_blocks_;
236
237 process_transform_.reset(new LappedTransform(num_input_channels_,
238 0u,
239 chunk_length_,
240 window_,
241 kFftSize,
242 kFftSize / 2,
243 this));
244 postfilter_transform_.reset(new PostFilterTransform(
245 num_postfilter_channels_, chunk_length_, window_, kFftSize));
246 const float wave_number_step =
247 (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds);
248 for (size_t i = 0; i < kNumFreqBins; ++i) {
249 time_smooth_mask_[i] = 1.f;
250 final_mask_[i] = 1.f;
251 wave_numbers_[i] = i * wave_number_step;
252 }
253
254 InitLowFrequencyCorrectionRanges();
255 InitDiffuseCovMats();
256 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
257 }
258
259 // These bin indexes determine the regions over which a mean is taken. This is
260 // applied as a constant value over the adjacent end "frequency correction"
261 // regions.
262 //
263 // low_mean_start_bin_ high_mean_start_bin_
264 // v v constant
265 // |----------------|--------|----------------|-------|----------------|
266 // constant ^ ^
267 // low_mean_end_bin_ high_mean_end_bin_
268 //
InitLowFrequencyCorrectionRanges()269 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
270 low_mean_start_bin_ = Round(static_cast<float>(kLowMeanStartHz) *
271 kFftSize / sample_rate_hz_);
272 low_mean_end_bin_ = Round(static_cast<float>(kLowMeanEndHz) *
273 kFftSize / sample_rate_hz_);
274
275 RTC_DCHECK_GT(low_mean_start_bin_, 0U);
276 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
277 }
278
InitHighFrequencyCorrectionRanges()279 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
280 const float kAliasingFreqHz =
281 kSpeedOfSoundMeterSeconds /
282 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
283 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
284 sample_rate_hz_ / 2.f);
285 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
286 sample_rate_hz_ / 2.f);
287 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
288 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
289
290 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
291 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
292 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
293 }
294
InitInterfAngles()295 void NonlinearBeamformer::InitInterfAngles() {
296 interf_angles_radians_.clear();
297 const Point target_direction = AzimuthToPoint(target_angle_radians_);
298 const Point clockwise_interf_direction =
299 AzimuthToPoint(target_angle_radians_ - away_radians_);
300 if (!array_normal_ ||
301 DotProduct(*array_normal_, target_direction) *
302 DotProduct(*array_normal_, clockwise_interf_direction) >=
303 0.f) {
304 // The target and clockwise interferer are in the same half-plane defined
305 // by the array.
306 interf_angles_radians_.push_back(target_angle_radians_ - away_radians_);
307 } else {
308 // Otherwise, the interferer will begin reflecting back at the target.
309 // Instead rotate it away 180 degrees.
310 interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ +
311 M_PI);
312 }
313 const Point counterclock_interf_direction =
314 AzimuthToPoint(target_angle_radians_ + away_radians_);
315 if (!array_normal_ ||
316 DotProduct(*array_normal_, target_direction) *
317 DotProduct(*array_normal_, counterclock_interf_direction) >=
318 0.f) {
319 // The target and counter-clockwise interferer are in the same half-plane
320 // defined by the array.
321 interf_angles_radians_.push_back(target_angle_radians_ + away_radians_);
322 } else {
323 // Otherwise, the interferer will begin reflecting back at the target.
324 // Instead rotate it away 180 degrees.
325 interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ -
326 M_PI);
327 }
328 }
329
InitDelaySumMasks()330 void NonlinearBeamformer::InitDelaySumMasks() {
331 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
332 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
333 CovarianceMatrixGenerator::PhaseAlignmentMasks(
334 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
335 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
336
337 complex_f norm_factor = sqrt(
338 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
339 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
340 }
341 }
342
InitTargetCovMats()343 void NonlinearBeamformer::InitTargetCovMats() {
344 for (size_t i = 0; i < kNumFreqBins; ++i) {
345 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
346 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
347 }
348 }
349
InitDiffuseCovMats()350 void NonlinearBeamformer::InitDiffuseCovMats() {
351 for (size_t i = 0; i < kNumFreqBins; ++i) {
352 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
353 CovarianceMatrixGenerator::UniformCovarianceMatrix(
354 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
355 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
356 uniform_cov_mat_[i].Scale(1.f / normalization_factor);
357 uniform_cov_mat_[i].Scale(1 - kBalance);
358 }
359 }
360
InitInterfCovMats()361 void NonlinearBeamformer::InitInterfCovMats() {
362 for (size_t i = 0; i < kNumFreqBins; ++i) {
363 interf_cov_mats_[i].clear();
364 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
365 interf_cov_mats_[i].push_back(std::unique_ptr<ComplexMatrixF>(
366 new ComplexMatrixF(num_input_channels_, num_input_channels_)));
367 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
368 CovarianceMatrixGenerator::AngledCovarianceMatrix(
369 kSpeedOfSoundMeterSeconds,
370 interf_angles_radians_[j],
371 i,
372 kFftSize,
373 kNumFreqBins,
374 sample_rate_hz_,
375 array_geometry_,
376 &angled_cov_mat);
377 // Normalize matrices before averaging them.
378 complex_f normalization_factor = angled_cov_mat.elements()[0][0];
379 angled_cov_mat.Scale(1.f / normalization_factor);
380 // Weighted average of matrices.
381 angled_cov_mat.Scale(kBalance);
382 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
383 }
384 }
385 }
386
NormalizeCovMats()387 void NonlinearBeamformer::NormalizeCovMats() {
388 for (size_t i = 0; i < kNumFreqBins; ++i) {
389 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
390 rpsiws_[i].clear();
391 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
392 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
393 }
394 }
395 }
396
AnalyzeChunk(const ChannelBuffer<float> & data)397 void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) {
398 RTC_DCHECK_EQ(data.num_channels(), num_input_channels_);
399 RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_);
400
401 old_high_pass_mask_ = high_pass_postfilter_mask_;
402 process_transform_->ProcessChunk(data.channels(0), nullptr);
403 }
404
PostFilter(ChannelBuffer<float> * data)405 void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) {
406 RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_);
407 // TODO(aluebs): Change to RTC_CHECK_EQ once the ChannelBuffer is updated.
408 RTC_DCHECK_GE(data->num_channels(), num_postfilter_channels_);
409
410 postfilter_transform_->ProcessChunk(data->channels(0), final_mask_);
411
412 // Ramp up/down for smoothing is needed in order to avoid discontinuities in
413 // the transitions between 10 ms frames.
414 const float ramp_increment =
415 (high_pass_postfilter_mask_ - old_high_pass_mask_) /
416 data->num_frames_per_band();
417 for (size_t i = 1; i < data->num_bands(); ++i) {
418 float smoothed_mask = old_high_pass_mask_;
419 for (size_t j = 0; j < data->num_frames_per_band(); ++j) {
420 smoothed_mask += ramp_increment;
421 for (size_t k = 0; k < num_postfilter_channels_; ++k) {
422 data->channels(i)[k][j] *= smoothed_mask;
423 }
424 }
425 }
426 }
427
AimAt(const SphericalPointf & target_direction)428 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
429 target_angle_radians_ = target_direction.azimuth();
430 InitHighFrequencyCorrectionRanges();
431 InitInterfAngles();
432 InitDelaySumMasks();
433 InitTargetCovMats();
434 InitInterfCovMats();
435 NormalizeCovMats();
436 }
437
IsInBeam(const SphericalPointf & spherical_point)438 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
439 // If more than half-beamwidth degrees away from the beam's center,
440 // you are out of the beam.
441 return fabs(spherical_point.azimuth() - target_angle_radians_) <
442 kHalfBeamWidthRadians;
443 }
444
is_target_present()445 bool NonlinearBeamformer::is_target_present() { return is_target_present_; }
446
ProcessAudioBlock(const complex_f * const * input,size_t num_input_channels,size_t num_freq_bins,size_t num_output_channels,complex_f * const * output)447 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
448 size_t num_input_channels,
449 size_t num_freq_bins,
450 size_t num_output_channels,
451 complex_f* const* output) {
452 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);
453 RTC_CHECK_EQ(num_input_channels_, num_input_channels);
454 RTC_CHECK_EQ(0, num_output_channels);
455
456 // Calculating the post-filter masks. Note that we need two for each
457 // frequency bin to account for the positive and negative interferer
458 // angle.
459 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
460 eig_m_.CopyFromColumn(input, i, num_input_channels_);
461 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
462 if (eig_m_norm_factor != 0.f) {
463 eig_m_.Scale(1.f / eig_m_norm_factor);
464 }
465
466 float rxim = Norm(target_cov_mats_[i], eig_m_);
467 float ratio_rxiw_rxim = 0.f;
468 if (rxim > 0.f) {
469 ratio_rxiw_rxim = rxiws_[i] / rxim;
470 }
471
472 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
473 rmw *= rmw;
474 float rmw_r = rmw.real();
475
476 new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],
477 rpsiws_[i][0],
478 ratio_rxiw_rxim,
479 rmw_r);
480 for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {
481 float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],
482 rpsiws_[i][j],
483 ratio_rxiw_rxim,
484 rmw_r);
485 if (tmp_mask < new_mask_[i]) {
486 new_mask_[i] = tmp_mask;
487 }
488 }
489 }
490
491 ApplyMaskTimeSmoothing();
492 EstimateTargetPresence();
493 ApplyLowFrequencyCorrection();
494 ApplyHighFrequencyCorrection();
495 ApplyMaskFrequencySmoothing();
496 }
497
CalculatePostfilterMask(const ComplexMatrixF & interf_cov_mat,float rpsiw,float ratio_rxiw_rxim,float rmw_r)498 float NonlinearBeamformer::CalculatePostfilterMask(
499 const ComplexMatrixF& interf_cov_mat,
500 float rpsiw,
501 float ratio_rxiw_rxim,
502 float rmw_r) {
503 float rpsim = Norm(interf_cov_mat, eig_m_);
504
505 float ratio = 0.f;
506 if (rpsim > 0.f) {
507 ratio = rpsiw / rpsim;
508 }
509
510 float numerator = 1.f - kCutOffConstant;
511 if (rmw_r > 0.f) {
512 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r);
513 }
514
515 float denominator = 1.f - kCutOffConstant;
516 if (ratio_rxiw_rxim > 0.f) {
517 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim);
518 }
519
520 return numerator / denominator;
521 }
522
523 // Smooth new_mask_ into time_smooth_mask_.
ApplyMaskTimeSmoothing()524 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
525 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
526 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
527 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
528 }
529 }
530
531 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
ApplyMaskFrequencySmoothing()532 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
533 // Smooth over frequency in both directions. The "frequency correction"
534 // regions have constant value, but we enter them to smooth over the jump
535 // that exists at the boundary. However, this does mean when smoothing "away"
536 // from the region that we only need to use the last element.
537 //
538 // Upward smoothing:
539 // low_mean_start_bin_
540 // v
541 // |------|------------|------|
542 // ^------------------>^
543 //
544 // Downward smoothing:
545 // high_mean_end_bin_
546 // v
547 // |------|------------|------|
548 // ^<------------------^
549 std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
550 for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
551 final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
552 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
553 }
554 for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
555 final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
556 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
557 }
558 }
559
560 // Apply low frequency correction to time_smooth_mask_.
ApplyLowFrequencyCorrection()561 void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
562 const float low_frequency_mask =
563 MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
564 std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
565 low_frequency_mask);
566 }
567
568 // Apply high frequency correction to time_smooth_mask_. Update
569 // high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
ApplyHighFrequencyCorrection()570 void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
571 high_pass_postfilter_mask_ =
572 MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
573 std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
574 time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
575 }
576
577 // Compute mean over the given range of time_smooth_mask_, [first, last).
MaskRangeMean(size_t first,size_t last)578 float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
579 RTC_DCHECK_GT(last, first);
580 const float sum = std::accumulate(time_smooth_mask_ + first,
581 time_smooth_mask_ + last, 0.f);
582 return sum / (last - first);
583 }
584
EstimateTargetPresence()585 void NonlinearBeamformer::EstimateTargetPresence() {
586 const size_t quantile = static_cast<size_t>(
587 (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
588 low_mean_start_bin_);
589 std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
590 new_mask_ + high_mean_end_bin_ + 1);
591 if (new_mask_[quantile] > kMaskTargetThreshold) {
592 is_target_present_ = true;
593 interference_blocks_count_ = 0;
594 } else {
595 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
596 }
597 }
598
599 } // namespace webrtc
600