1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <string.h>
13 #include <stdlib.h>
14
15 #include "rtc_base/checks.h"
16 #include "common_audio/signal_processing/include/signal_processing_library.h"
17 #include "common_audio/third_party/fft4g/fft4g.h"
18 #include "modules/audio_processing/ns/noise_suppression.h"
19 #include "modules/audio_processing/ns/ns_core.h"
20 #include "modules/audio_processing/ns/windows_private.h"
21
22 // Set Feature Extraction Parameters.
set_feature_extraction_parameters(NoiseSuppressionC * self)23 static void set_feature_extraction_parameters(NoiseSuppressionC* self) {
24 // Bin size of histogram.
25 self->featureExtractionParams.binSizeLrt = 0.1f;
26 self->featureExtractionParams.binSizeSpecFlat = 0.05f;
27 self->featureExtractionParams.binSizeSpecDiff = 0.1f;
28
29 // Range of histogram over which LRT threshold is computed.
30 self->featureExtractionParams.rangeAvgHistLrt = 1.f;
31
32 // Scale parameters: multiply dominant peaks of the histograms by scale factor
33 // to obtain thresholds for prior model.
34 // For LRT and spectral difference.
35 self->featureExtractionParams.factor1ModelPars = 1.2f;
36 // For spectral_flatness: used when noise is flatter than speech.
37 self->featureExtractionParams.factor2ModelPars = 0.9f;
38
39 // Peak limit for spectral flatness (varies between 0 and 1).
40 self->featureExtractionParams.thresPosSpecFlat = 0.6f;
41
42 // Limit on spacing of two highest peaks in histogram: spacing determined by
43 // bin size.
44 self->featureExtractionParams.limitPeakSpacingSpecFlat =
45 2 * self->featureExtractionParams.binSizeSpecFlat;
46 self->featureExtractionParams.limitPeakSpacingSpecDiff =
47 2 * self->featureExtractionParams.binSizeSpecDiff;
48
49 // Limit on relevance of second peak.
50 self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f;
51 self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f;
52
53 // Fluctuation limit of LRT feature.
54 self->featureExtractionParams.thresFluctLrt = 0.05f;
55
56 // Limit on the max and min values for the feature thresholds.
57 self->featureExtractionParams.maxLrt = 1.f;
58 self->featureExtractionParams.minLrt = 0.2f;
59
60 self->featureExtractionParams.maxSpecFlat = 0.95f;
61 self->featureExtractionParams.minSpecFlat = 0.1f;
62
63 self->featureExtractionParams.maxSpecDiff = 1.f;
64 self->featureExtractionParams.minSpecDiff = 0.16f;
65
66 // Criteria of weight of histogram peak to accept/reject feature.
67 self->featureExtractionParams.thresWeightSpecFlat =
68 (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness.
69 self->featureExtractionParams.thresWeightSpecDiff =
70 (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference.
71 }
72
73 // Initialize state.
WebRtcNs_InitCore(NoiseSuppressionC * self,uint32_t fs)74 int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) {
75 int i;
76 // Check for valid pointer.
77 if (self == NULL) {
78 return -1;
79 }
80
81 // Initialization of struct.
82 if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
83 self->fs = fs;
84 } else {
85 return -1;
86 }
87 self->windShift = 0;
88 // We only support 10ms frames.
89 if (fs == 8000) {
90 self->blockLen = 80;
91 self->anaLen = 128;
92 self->window = kBlocks80w128;
93 } else {
94 self->blockLen = 160;
95 self->anaLen = 256;
96 self->window = kBlocks160w256;
97 }
98 self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins.
99
100 // Initialize FFT work arrays.
101 self->ip[0] = 0; // Setting this triggers initialization.
102 memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
103 WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft);
104
105 memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
106 memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
107 memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
108
109 // For HB processing.
110 memset(self->dataBufHB,
111 0,
112 sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
113
114 // For quantile noise estimation.
115 memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
116 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
117 self->lquantile[i] = 8.f;
118 self->density[i] = 0.3f;
119 }
120
121 for (i = 0; i < SIMULT; i++) {
122 self->counter[i] =
123 (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
124 }
125
126 self->updates = 0;
127
128 // Wiener filter initialization.
129 for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
130 self->smooth[i] = 1.f;
131 }
132
133 // Set the aggressiveness: default.
134 self->aggrMode = 0;
135
136 // Initialize variables for new method.
137 self->priorSpeechProb = 0.5f; // Prior prob for speech/noise.
138 // Previous analyze mag spectrum.
139 memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL);
140 // Previous process mag spectrum.
141 memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL);
142 // Current noise-spectrum.
143 memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
144 // Previous noise-spectrum.
145 memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL);
146 // Conservative noise spectrum estimate.
147 memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL);
148 // For estimation of HB in second pass.
149 memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL);
150 // Initial average magnitude spectrum.
151 memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL);
152 for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
153 // Smooth LR (same as threshold).
154 self->logLrtTimeAvg[i] = LRT_FEATURE_THR;
155 }
156
157 // Feature quantities.
158 // Spectral flatness (start on threshold).
159 self->featureData[0] = SF_FEATURE_THR;
160 self->featureData[1] = 0.f; // Spectral entropy: not used in this version.
161 self->featureData[2] = 0.f; // Spectral variance: not used in this version.
162 // Average LRT factor (start on threshold).
163 self->featureData[3] = LRT_FEATURE_THR;
164 // Spectral template diff (start on threshold).
165 self->featureData[4] = SF_FEATURE_THR;
166 self->featureData[5] = 0.f; // Normalization for spectral difference.
167 // Window time-average of input magnitude spectrum.
168 self->featureData[6] = 0.f;
169
170 memset(self->parametricNoise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
171
172 // Histogram quantities: used to estimate/update thresholds for features.
173 memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST);
174 memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST);
175 memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST);
176
177
178 self->blockInd = -1; // Frame counter.
179 // Default threshold for LRT feature.
180 self->priorModelPars[0] = LRT_FEATURE_THR;
181 // Threshold for spectral flatness: determined on-line.
182 self->priorModelPars[1] = 0.5f;
183 // sgn_map par for spectral measure: 1 for flatness measure.
184 self->priorModelPars[2] = 1.f;
185 // Threshold for template-difference feature: determined on-line.
186 self->priorModelPars[3] = 0.5f;
187 // Default weighting parameter for LRT feature.
188 self->priorModelPars[4] = 1.f;
189 // Default weighting parameter for spectral flatness feature.
190 self->priorModelPars[5] = 0.f;
191 // Default weighting parameter for spectral difference feature.
192 self->priorModelPars[6] = 0.f;
193
194 // Update flag for parameters:
195 // 0 no update, 1 = update once, 2 = update every window.
196 self->modelUpdatePars[0] = 2;
197 self->modelUpdatePars[1] = 500; // Window for update.
198 // Counter for update of conservative noise spectrum.
199 self->modelUpdatePars[2] = 0;
200 // Counter if the feature thresholds are updated during the sequence.
201 self->modelUpdatePars[3] = self->modelUpdatePars[1];
202
203 self->signalEnergy = 0.0;
204 self->sumMagn = 0.0;
205 self->whiteNoiseLevel = 0.0;
206 self->pinkNoiseNumerator = 0.0;
207 self->pinkNoiseExp = 0.0;
208
209 set_feature_extraction_parameters(self);
210
211 // Default mode.
212 WebRtcNs_set_policy_core(self, 0);
213
214 self->initFlag = 1;
215 return 0;
216 }
217
218 // Estimate noise.
NoiseEstimation(NoiseSuppressionC * self,float * magn,float * noise)219 static void NoiseEstimation(NoiseSuppressionC* self,
220 float* magn,
221 float* noise) {
222 size_t i, s, offset;
223 float lmagn[HALF_ANAL_BLOCKL], delta;
224
225 if (self->updates < END_STARTUP_LONG) {
226 self->updates++;
227 }
228
229 for (i = 0; i < self->magnLen; i++) {
230 lmagn[i] = (float)log(magn[i]);
231 }
232
233 // Loop over simultaneous estimates.
234 for (s = 0; s < SIMULT; s++) {
235 offset = s * self->magnLen;
236
237 // newquantest(...)
238 for (i = 0; i < self->magnLen; i++) {
239 // Compute delta.
240 if (self->density[offset + i] > 1.0) {
241 delta = FACTOR * 1.f / self->density[offset + i];
242 } else {
243 delta = FACTOR;
244 }
245
246 // Update log quantile estimate.
247 if (lmagn[i] > self->lquantile[offset + i]) {
248 self->lquantile[offset + i] +=
249 QUANTILE * delta / (float)(self->counter[s] + 1);
250 } else {
251 self->lquantile[offset + i] -=
252 (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1);
253 }
254
255 // Update density estimate.
256 if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) {
257 self->density[offset + i] =
258 ((float)self->counter[s] * self->density[offset + i] +
259 1.f / (2.f * WIDTH)) /
260 (float)(self->counter[s] + 1);
261 }
262 } // End loop over magnitude spectrum.
263
264 if (self->counter[s] >= END_STARTUP_LONG) {
265 self->counter[s] = 0;
266 if (self->updates >= END_STARTUP_LONG) {
267 for (i = 0; i < self->magnLen; i++) {
268 self->quantile[i] = (float)exp(self->lquantile[offset + i]);
269 }
270 }
271 }
272
273 self->counter[s]++;
274 } // End loop over simultaneous estimates.
275
276 // Sequentially update the noise during startup.
277 if (self->updates < END_STARTUP_LONG) {
278 // Use the last "s" to get noise during startup that differ from zero.
279 for (i = 0; i < self->magnLen; i++) {
280 self->quantile[i] = (float)exp(self->lquantile[offset + i]);
281 }
282 }
283
284 for (i = 0; i < self->magnLen; i++) {
285 noise[i] = self->quantile[i];
286 }
287 }
288
289 // Extract thresholds for feature parameters.
290 // Histograms are computed over some window size (given by
291 // self->modelUpdatePars[1]).
292 // Thresholds and weights are extracted every window.
293 // |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights.
294 // Threshold and weights are returned in: self->priorModelPars.
FeatureParameterExtraction(NoiseSuppressionC * self,int flag)295 static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) {
296 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
297 int maxPeak1, maxPeak2;
298 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
299 weightPeak2SpecDiff;
300
301 float binMid, featureSum;
302 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
303 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
304
305 // 3 features: LRT, flatness, difference.
306 // lrt_feature = self->featureData[3];
307 // flat_feature = self->featureData[0];
308 // diff_feature = self->featureData[4];
309
310 // Update histograms.
311 if (flag == 0) {
312 // LRT
313 if ((self->featureData[3] <
314 HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) &&
315 (self->featureData[3] >= 0.0)) {
316 i = (int)(self->featureData[3] /
317 self->featureExtractionParams.binSizeLrt);
318 self->histLrt[i]++;
319 }
320 // Spectral flatness.
321 if ((self->featureData[0] <
322 HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) &&
323 (self->featureData[0] >= 0.0)) {
324 i = (int)(self->featureData[0] /
325 self->featureExtractionParams.binSizeSpecFlat);
326 self->histSpecFlat[i]++;
327 }
328 // Spectral difference.
329 if ((self->featureData[4] <
330 HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) &&
331 (self->featureData[4] >= 0.0)) {
332 i = (int)(self->featureData[4] /
333 self->featureExtractionParams.binSizeSpecDiff);
334 self->histSpecDiff[i]++;
335 }
336 }
337
338 // Extract parameters for speech/noise probability.
339 if (flag == 1) {
340 // LRT feature: compute the average over
341 // self->featureExtractionParams.rangeAvgHistLrt.
342 avgHistLrt = 0.0;
343 avgHistLrtCompl = 0.0;
344 avgSquareHistLrt = 0.0;
345 numHistLrt = 0;
346 for (i = 0; i < HIST_PAR_EST; i++) {
347 binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt;
348 if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) {
349 avgHistLrt += self->histLrt[i] * binMid;
350 numHistLrt += self->histLrt[i];
351 }
352 avgSquareHistLrt += self->histLrt[i] * binMid * binMid;
353 avgHistLrtCompl += self->histLrt[i] * binMid;
354 }
355 if (numHistLrt > 0) {
356 avgHistLrt = avgHistLrt / ((float)numHistLrt);
357 }
358 avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]);
359 avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]);
360 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
361 // Get threshold for LRT feature.
362 if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
363 // Very low fluctuation, so likely noise.
364 self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
365 } else {
366 self->priorModelPars[0] =
367 self->featureExtractionParams.factor1ModelPars * avgHistLrt;
368 // Check if value is within min/max range.
369 if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) {
370 self->priorModelPars[0] = self->featureExtractionParams.minLrt;
371 }
372 if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) {
373 self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
374 }
375 }
376 // Done with LRT feature.
377
378 // For spectral flatness and spectral difference: compute the main peaks of
379 // histogram.
380 maxPeak1 = 0;
381 maxPeak2 = 0;
382 posPeak1SpecFlat = 0.0;
383 posPeak2SpecFlat = 0.0;
384 weightPeak1SpecFlat = 0;
385 weightPeak2SpecFlat = 0;
386
387 // Peaks for flatness.
388 for (i = 0; i < HIST_PAR_EST; i++) {
389 binMid =
390 (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat;
391 if (self->histSpecFlat[i] > maxPeak1) {
392 // Found new "first" peak.
393 maxPeak2 = maxPeak1;
394 weightPeak2SpecFlat = weightPeak1SpecFlat;
395 posPeak2SpecFlat = posPeak1SpecFlat;
396
397 maxPeak1 = self->histSpecFlat[i];
398 weightPeak1SpecFlat = self->histSpecFlat[i];
399 posPeak1SpecFlat = binMid;
400 } else if (self->histSpecFlat[i] > maxPeak2) {
401 // Found new "second" peak.
402 maxPeak2 = self->histSpecFlat[i];
403 weightPeak2SpecFlat = self->histSpecFlat[i];
404 posPeak2SpecFlat = binMid;
405 }
406 }
407
408 // Compute two peaks for spectral difference.
409 maxPeak1 = 0;
410 maxPeak2 = 0;
411 posPeak1SpecDiff = 0.0;
412 posPeak2SpecDiff = 0.0;
413 weightPeak1SpecDiff = 0;
414 weightPeak2SpecDiff = 0;
415 // Peaks for spectral difference.
416 for (i = 0; i < HIST_PAR_EST; i++) {
417 binMid =
418 ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff;
419 if (self->histSpecDiff[i] > maxPeak1) {
420 // Found new "first" peak.
421 maxPeak2 = maxPeak1;
422 weightPeak2SpecDiff = weightPeak1SpecDiff;
423 posPeak2SpecDiff = posPeak1SpecDiff;
424
425 maxPeak1 = self->histSpecDiff[i];
426 weightPeak1SpecDiff = self->histSpecDiff[i];
427 posPeak1SpecDiff = binMid;
428 } else if (self->histSpecDiff[i] > maxPeak2) {
429 // Found new "second" peak.
430 maxPeak2 = self->histSpecDiff[i];
431 weightPeak2SpecDiff = self->histSpecDiff[i];
432 posPeak2SpecDiff = binMid;
433 }
434 }
435
436 // For spectrum flatness feature.
437 useFeatureSpecFlat = 1;
438 // Merge the two peaks if they are close.
439 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
440 self->featureExtractionParams.limitPeakSpacingSpecFlat) &&
441 (weightPeak2SpecFlat >
442 self->featureExtractionParams.limitPeakWeightsSpecFlat *
443 weightPeak1SpecFlat)) {
444 weightPeak1SpecFlat += weightPeak2SpecFlat;
445 posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat);
446 }
447 // Reject if weight of peaks is not large enough, or peak value too small.
448 if (weightPeak1SpecFlat <
449 self->featureExtractionParams.thresWeightSpecFlat ||
450 posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) {
451 useFeatureSpecFlat = 0;
452 }
453 // If selected, get the threshold.
454 if (useFeatureSpecFlat == 1) {
455 // Compute the threshold.
456 self->priorModelPars[1] =
457 self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
458 // Check if value is within min/max range.
459 if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) {
460 self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat;
461 }
462 if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) {
463 self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat;
464 }
465 }
466 // Done with flatness feature.
467
468 // For template feature.
469 useFeatureSpecDiff = 1;
470 // Merge the two peaks if they are close.
471 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
472 self->featureExtractionParams.limitPeakSpacingSpecDiff) &&
473 (weightPeak2SpecDiff >
474 self->featureExtractionParams.limitPeakWeightsSpecDiff *
475 weightPeak1SpecDiff)) {
476 weightPeak1SpecDiff += weightPeak2SpecDiff;
477 posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff);
478 }
479 // Get the threshold value.
480 self->priorModelPars[3] =
481 self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
482 // Reject if weight of peaks is not large enough.
483 if (weightPeak1SpecDiff <
484 self->featureExtractionParams.thresWeightSpecDiff) {
485 useFeatureSpecDiff = 0;
486 }
487 // Check if value is within min/max range.
488 if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) {
489 self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff;
490 }
491 if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) {
492 self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff;
493 }
494 // Done with spectral difference feature.
495
496 // Don't use template feature if fluctuation of LRT feature is very low:
497 // most likely just noise state.
498 if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
499 useFeatureSpecDiff = 0;
500 }
501
502 // Select the weights between the features.
503 // self->priorModelPars[4] is weight for LRT: always selected.
504 // self->priorModelPars[5] is weight for spectral flatness.
505 // self->priorModelPars[6] is weight for spectral difference.
506 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
507 self->priorModelPars[4] = 1.f / featureSum;
508 self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
509 self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
510
511 // Set hists to zero for next update.
512 if (self->modelUpdatePars[0] >= 1) {
513 for (i = 0; i < HIST_PAR_EST; i++) {
514 self->histLrt[i] = 0;
515 self->histSpecFlat[i] = 0;
516 self->histSpecDiff[i] = 0;
517 }
518 }
519 } // End of flag == 1.
520 }
521
522 // Compute spectral flatness on input spectrum.
523 // |magnIn| is the magnitude spectrum.
524 // Spectral flatness is returned in self->featureData[0].
ComputeSpectralFlatness(NoiseSuppressionC * self,const float * magnIn)525 static void ComputeSpectralFlatness(NoiseSuppressionC* self,
526 const float* magnIn) {
527 size_t i;
528 size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures.
529 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
530
531 // Compute spectral measures.
532 // For flatness.
533 avgSpectralFlatnessNum = 0.0;
534 avgSpectralFlatnessDen = self->sumMagn;
535 for (i = 0; i < shiftLP; i++) {
536 avgSpectralFlatnessDen -= magnIn[i];
537 }
538 // Compute log of ratio of the geometric to arithmetic mean: check for log(0)
539 // case.
540 for (i = shiftLP; i < self->magnLen; i++) {
541 if (magnIn[i] > 0.0) {
542 avgSpectralFlatnessNum += (float)log(magnIn[i]);
543 } else {
544 self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0];
545 return;
546 }
547 }
548 // Normalize.
549 avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen;
550 avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen;
551
552 // Ratio and inverse log: check for case of log(0).
553 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
554
555 // Time-avg update of spectral flatness feature.
556 self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]);
557 // Done with flatness feature.
558 }
559
560 // Compute prior and post SNR based on quantile noise estimation.
561 // Compute DD estimate of prior SNR.
562 // Inputs:
563 // * |magn| is the signal magnitude spectrum estimate.
564 // * |noise| is the magnitude noise spectrum estimate.
565 // Outputs:
566 // * |snrLocPrior| is the computed prior SNR.
567 // * |snrLocPost| is the computed post SNR.
ComputeSnr(const NoiseSuppressionC * self,const float * magn,const float * noise,float * snrLocPrior,float * snrLocPost)568 static void ComputeSnr(const NoiseSuppressionC* self,
569 const float* magn,
570 const float* noise,
571 float* snrLocPrior,
572 float* snrLocPost) {
573 size_t i;
574
575 for (i = 0; i < self->magnLen; i++) {
576 // Previous post SNR.
577 // Previous estimate: based on previous frame with gain filter.
578 float previousEstimateStsa = self->magnPrevAnalyze[i] /
579 (self->noisePrev[i] + 0.0001f) * self->smooth[i];
580 // Post SNR.
581 snrLocPost[i] = 0.f;
582 if (magn[i] > noise[i]) {
583 snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f;
584 }
585 // DD estimate is sum of two terms: current estimate and previous estimate.
586 // Directed decision update of snrPrior.
587 snrLocPrior[i] =
588 DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i];
589 } // End of loop over frequencies.
590 }
591
592 // Compute the difference measure between input spectrum and a template/learned
593 // noise spectrum.
594 // |magnIn| is the input spectrum.
595 // The reference/template spectrum is self->magnAvgPause[i].
596 // Returns (normalized) spectral difference in self->featureData[4].
ComputeSpectralDifference(NoiseSuppressionC * self,const float * magnIn)597 static void ComputeSpectralDifference(NoiseSuppressionC* self,
598 const float* magnIn) {
599 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
600 // var(magnAvgPause)
601 size_t i;
602 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
603
604 avgPause = 0.0;
605 avgMagn = self->sumMagn;
606 // Compute average quantities.
607 for (i = 0; i < self->magnLen; i++) {
608 // Conservative smooth noise spectrum from pause frames.
609 avgPause += self->magnAvgPause[i];
610 }
611 avgPause /= self->magnLen;
612 avgMagn /= self->magnLen;
613
614 covMagnPause = 0.0;
615 varPause = 0.0;
616 varMagn = 0.0;
617 // Compute variance and covariance quantities.
618 for (i = 0; i < self->magnLen; i++) {
619 covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause);
620 varPause +=
621 (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause);
622 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
623 }
624 covMagnPause /= self->magnLen;
625 varPause /= self->magnLen;
626 varMagn /= self->magnLen;
627 // Update of average magnitude spectrum.
628 self->featureData[6] += self->signalEnergy;
629
630 avgDiffNormMagn =
631 varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f);
632 // Normalize and compute time-avg update of difference feature.
633 avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f));
634 self->featureData[4] +=
635 SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]);
636 }
637
638 // Compute speech/noise probability.
639 // Speech/noise probability is returned in |probSpeechFinal|.
640 // |magn| is the input magnitude spectrum.
641 // |noise| is the noise spectrum.
642 // |snrLocPrior| is the prior SNR for each frequency.
643 // |snrLocPost| is the post SNR for each frequency.
SpeechNoiseProb(NoiseSuppressionC * self,float * probSpeechFinal,const float * snrLocPrior,const float * snrLocPost)644 static void SpeechNoiseProb(NoiseSuppressionC* self,
645 float* probSpeechFinal,
646 const float* snrLocPrior,
647 const float* snrLocPost) {
648 size_t i;
649 int sgnMap;
650 float invLrt, gainPrior, indPrior;
651 float logLrtTimeAvgKsum, besselTmp;
652 float indicator0, indicator1, indicator2;
653 float tmpFloat1, tmpFloat2;
654 float weightIndPrior0, weightIndPrior1, weightIndPrior2;
655 float threshPrior0, threshPrior1, threshPrior2;
656 float widthPrior, widthPrior0, widthPrior1, widthPrior2;
657
658 widthPrior0 = WIDTH_PR_MAP;
659 // Width for pause region: lower range, so increase width in tanh map.
660 widthPrior1 = 2.f * WIDTH_PR_MAP;
661 widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure.
662
663 // Threshold parameters for features.
664 threshPrior0 = self->priorModelPars[0];
665 threshPrior1 = self->priorModelPars[1];
666 threshPrior2 = self->priorModelPars[3];
667
668 // Sign for flatness feature.
669 sgnMap = (int)(self->priorModelPars[2]);
670
671 // Weight parameters for features.
672 weightIndPrior0 = self->priorModelPars[4];
673 weightIndPrior1 = self->priorModelPars[5];
674 weightIndPrior2 = self->priorModelPars[6];
675
676 // Compute feature based on average LR factor.
677 // This is the average over all frequencies of the smooth log LRT.
678 logLrtTimeAvgKsum = 0.0;
679 for (i = 0; i < self->magnLen; i++) {
680 tmpFloat1 = 1.f + 2.f * snrLocPrior[i];
681 tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f);
682 besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2;
683 self->logLrtTimeAvg[i] +=
684 LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]);
685 logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
686 }
687 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen);
688 self->featureData[3] = logLrtTimeAvgKsum;
689 // Done with computation of LR factor.
690
691 // Compute the indicator functions.
692 // Average LRT feature.
693 widthPrior = widthPrior0;
694 // Use larger width in tanh map for pause regions.
695 if (logLrtTimeAvgKsum < threshPrior0) {
696 widthPrior = widthPrior1;
697 }
698 // Compute indicator function: sigmoid map.
699 indicator0 =
700 0.5f *
701 ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f);
702
703 // Spectral flatness feature.
704 tmpFloat1 = self->featureData[0];
705 widthPrior = widthPrior0;
706 // Use larger width in tanh map for pause regions.
707 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
708 widthPrior = widthPrior1;
709 }
710 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
711 widthPrior = widthPrior1;
712 }
713 // Compute indicator function: sigmoid map.
714 indicator1 =
715 0.5f *
716 ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
717 1.f);
718
719 // For template spectrum-difference.
720 tmpFloat1 = self->featureData[4];
721 widthPrior = widthPrior0;
722 // Use larger width in tanh map for pause regions.
723 if (tmpFloat1 < threshPrior2) {
724 widthPrior = widthPrior2;
725 }
726 // Compute indicator function: sigmoid map.
727 indicator2 =
728 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f);
729
730 // Combine the indicator function with the feature weights.
731 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
732 weightIndPrior2 * indicator2;
733 // Done with computing indicator function.
734
735 // Compute the prior probability.
736 self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb);
737 // Make sure probabilities are within range: keep floor to 0.01.
738 if (self->priorSpeechProb > 1.f) {
739 self->priorSpeechProb = 1.f;
740 }
741 if (self->priorSpeechProb < 0.01f) {
742 self->priorSpeechProb = 0.01f;
743 }
744
745 // Final speech probability: combine prior model with LR factor:.
746 gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f);
747 for (i = 0; i < self->magnLen; i++) {
748 invLrt = (float)exp(-self->logLrtTimeAvg[i]);
749 invLrt = (float)gainPrior * invLrt;
750 probSpeechFinal[i] = 1.f / (1.f + invLrt);
751 }
752 }
753
754 // Update the noise features.
755 // Inputs:
756 // * |magn| is the signal magnitude spectrum estimate.
757 // * |updateParsFlag| is an update flag for parameters.
FeatureUpdate(NoiseSuppressionC * self,const float * magn,int updateParsFlag)758 static void FeatureUpdate(NoiseSuppressionC* self,
759 const float* magn,
760 int updateParsFlag) {
761 // Compute spectral flatness on input spectrum.
762 ComputeSpectralFlatness(self, magn);
763 // Compute difference of input spectrum with learned/estimated noise spectrum.
764 ComputeSpectralDifference(self, magn);
765 // Compute histograms for parameter decisions (thresholds and weights for
766 // features).
767 // Parameters are extracted once every window time.
768 // (=self->modelUpdatePars[1])
769 if (updateParsFlag >= 1) {
770 // Counter update.
771 self->modelUpdatePars[3]--;
772 // Update histogram.
773 if (self->modelUpdatePars[3] > 0) {
774 FeatureParameterExtraction(self, 0);
775 }
776 // Compute model parameters.
777 if (self->modelUpdatePars[3] == 0) {
778 FeatureParameterExtraction(self, 1);
779 self->modelUpdatePars[3] = self->modelUpdatePars[1];
780 // If wish to update only once, set flag to zero.
781 if (updateParsFlag == 1) {
782 self->modelUpdatePars[0] = 0;
783 } else {
784 // Update every window:
785 // Get normalization for spectral difference for next window estimate.
786 self->featureData[6] =
787 self->featureData[6] / ((float)self->modelUpdatePars[1]);
788 self->featureData[5] =
789 0.5f * (self->featureData[6] + self->featureData[5]);
790 self->featureData[6] = 0.f;
791 }
792 }
793 }
794 }
795
796 // Update the noise estimate.
797 // Inputs:
798 // * |magn| is the signal magnitude spectrum estimate.
799 // * |snrLocPrior| is the prior SNR.
800 // * |snrLocPost| is the post SNR.
801 // Output:
802 // * |noise| is the updated noise magnitude spectrum estimate.
UpdateNoiseEstimate(NoiseSuppressionC * self,const float * magn,const float * snrLocPrior,const float * snrLocPost,float * noise)803 static void UpdateNoiseEstimate(NoiseSuppressionC* self,
804 const float* magn,
805 const float* snrLocPrior,
806 const float* snrLocPost,
807 float* noise) {
808 size_t i;
809 float probSpeech, probNonSpeech;
810 // Time-avg parameter for noise update.
811 float gammaNoiseTmp = NOISE_UPDATE;
812 float gammaNoiseOld;
813 float noiseUpdateTmp;
814
815 for (i = 0; i < self->magnLen; i++) {
816 probSpeech = self->speechProb[i];
817 probNonSpeech = 1.f - probSpeech;
818 // Temporary noise update:
819 // Use it for speech frames if update value is less than previous.
820 noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] +
821 (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
822 probSpeech * self->noisePrev[i]);
823 // Time-constant based on speech/noise state.
824 gammaNoiseOld = gammaNoiseTmp;
825 gammaNoiseTmp = NOISE_UPDATE;
826 // Increase gamma (i.e., less noise update) for frame likely to be speech.
827 if (probSpeech > PROB_RANGE) {
828 gammaNoiseTmp = SPEECH_UPDATE;
829 }
830 // Conservative noise update.
831 if (probSpeech < PROB_RANGE) {
832 self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]);
833 }
834 // Noise update.
835 if (gammaNoiseTmp == gammaNoiseOld) {
836 noise[i] = noiseUpdateTmp;
837 } else {
838 noise[i] = gammaNoiseTmp * self->noisePrev[i] +
839 (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
840 probSpeech * self->noisePrev[i]);
841 // Allow for noise update downwards:
842 // If noise update decreases the noise, it is safe, so allow it to
843 // happen.
844 if (noiseUpdateTmp < noise[i]) {
845 noise[i] = noiseUpdateTmp;
846 }
847 }
848 } // End of freq loop.
849 }
850
851 // Updates |buffer| with a new |frame|.
852 // Inputs:
853 // * |frame| is a new speech frame or NULL for setting to zero.
854 // * |frame_length| is the length of the new frame.
855 // * |buffer_length| is the length of the buffer.
856 // Output:
857 // * |buffer| is the updated buffer.
UpdateBuffer(const float * frame,size_t frame_length,size_t buffer_length,float * buffer)858 static void UpdateBuffer(const float* frame,
859 size_t frame_length,
860 size_t buffer_length,
861 float* buffer) {
862 RTC_DCHECK_LT(buffer_length, 2 * frame_length);
863
864 memcpy(buffer,
865 buffer + frame_length,
866 sizeof(*buffer) * (buffer_length - frame_length));
867 if (frame) {
868 memcpy(buffer + buffer_length - frame_length,
869 frame,
870 sizeof(*buffer) * frame_length);
871 } else {
872 memset(buffer + buffer_length - frame_length,
873 0,
874 sizeof(*buffer) * frame_length);
875 }
876 }
877
878 // Transforms the signal from time to frequency domain.
879 // Inputs:
880 // * |time_data| is the signal in the time domain.
881 // * |time_data_length| is the length of the analysis buffer.
882 // * |magnitude_length| is the length of the spectrum magnitude, which equals
883 // the length of both |real| and |imag| (time_data_length / 2 + 1).
884 // Outputs:
885 // * |time_data| is the signal in the frequency domain.
886 // * |real| is the real part of the frequency domain.
887 // * |imag| is the imaginary part of the frequency domain.
888 // * |magn| is the calculated signal magnitude in the frequency domain.
FFT(NoiseSuppressionC * self,float * time_data,size_t time_data_length,size_t magnitude_length,float * real,float * imag,float * magn)889 static void FFT(NoiseSuppressionC* self,
890 float* time_data,
891 size_t time_data_length,
892 size_t magnitude_length,
893 float* real,
894 float* imag,
895 float* magn) {
896 size_t i;
897
898 RTC_DCHECK_EQ(magnitude_length, time_data_length / 2 + 1);
899
900 WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft);
901
902 imag[0] = 0;
903 real[0] = time_data[0];
904 magn[0] = fabsf(real[0]) + 1.f;
905 imag[magnitude_length - 1] = 0;
906 real[magnitude_length - 1] = time_data[1];
907 magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f;
908 for (i = 1; i < magnitude_length - 1; ++i) {
909 real[i] = time_data[2 * i];
910 imag[i] = time_data[2 * i + 1];
911 // Magnitude spectrum.
912 magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
913 }
914 }
915
916 // Transforms the signal from frequency to time domain.
917 // Inputs:
918 // * |real| is the real part of the frequency domain.
919 // * |imag| is the imaginary part of the frequency domain.
920 // * |magnitude_length| is the length of the spectrum magnitude, which equals
921 // the length of both |real| and |imag|.
922 // * |time_data_length| is the length of the analysis buffer
923 // (2 * (magnitude_length - 1)).
924 // Output:
925 // * |time_data| is the signal in the time domain.
IFFT(NoiseSuppressionC * self,const float * real,const float * imag,size_t magnitude_length,size_t time_data_length,float * time_data)926 static void IFFT(NoiseSuppressionC* self,
927 const float* real,
928 const float* imag,
929 size_t magnitude_length,
930 size_t time_data_length,
931 float* time_data) {
932 size_t i;
933
934 RTC_DCHECK_EQ(time_data_length, 2 * (magnitude_length - 1));
935
936 time_data[0] = real[0];
937 time_data[1] = real[magnitude_length - 1];
938 for (i = 1; i < magnitude_length - 1; ++i) {
939 time_data[2 * i] = real[i];
940 time_data[2 * i + 1] = imag[i];
941 }
942 WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft);
943
944 for (i = 0; i < time_data_length; ++i) {
945 time_data[i] *= 2.f / time_data_length; // FFT scaling.
946 }
947 }
948
949 // Calculates the energy of a buffer.
950 // Inputs:
951 // * |buffer| is the buffer over which the energy is calculated.
952 // * |length| is the length of the buffer.
953 // Returns the calculated energy.
Energy(const float * buffer,size_t length)954 static float Energy(const float* buffer, size_t length) {
955 size_t i;
956 float energy = 0.f;
957
958 for (i = 0; i < length; ++i) {
959 energy += buffer[i] * buffer[i];
960 }
961
962 return energy;
963 }
964
965 // Windows a buffer.
966 // Inputs:
967 // * |window| is the window by which to multiply.
968 // * |data| is the data without windowing.
969 // * |length| is the length of the window and data.
970 // Output:
971 // * |data_windowed| is the windowed data.
Windowing(const float * window,const float * data,size_t length,float * data_windowed)972 static void Windowing(const float* window,
973 const float* data,
974 size_t length,
975 float* data_windowed) {
976 size_t i;
977
978 for (i = 0; i < length; ++i) {
979 data_windowed[i] = window[i] * data[i];
980 }
981 }
982
983 // Estimate prior SNR decision-directed and compute DD based Wiener Filter.
984 // Input:
985 // * |magn| is the signal magnitude spectrum estimate.
986 // Output:
987 // * |theFilter| is the frequency response of the computed Wiener filter.
ComputeDdBasedWienerFilter(const NoiseSuppressionC * self,const float * magn,float * theFilter)988 static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self,
989 const float* magn,
990 float* theFilter) {
991 size_t i;
992 float snrPrior, previousEstimateStsa, currentEstimateStsa;
993
994 for (i = 0; i < self->magnLen; i++) {
995 // Previous estimate: based on previous frame with gain filter.
996 previousEstimateStsa = self->magnPrevProcess[i] /
997 (self->noisePrev[i] + 0.0001f) * self->smooth[i];
998 // Post and prior SNR.
999 currentEstimateStsa = 0.f;
1000 if (magn[i] > self->noise[i]) {
1001 currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f;
1002 }
1003 // DD estimate is sum of two terms: current estimate and previous estimate.
1004 // Directed decision update of |snrPrior|.
1005 snrPrior = DD_PR_SNR * previousEstimateStsa +
1006 (1.f - DD_PR_SNR) * currentEstimateStsa;
1007 // Gain filter.
1008 theFilter[i] = snrPrior / (self->overdrive + snrPrior);
1009 } // End of loop over frequencies.
1010 }
1011
1012 // Changes the aggressiveness of the noise suppression method.
1013 // |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is
1014 // aggressive (15dB).
1015 // Returns 0 on success and -1 otherwise.
WebRtcNs_set_policy_core(NoiseSuppressionC * self,int mode)1016 int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) {
1017 // Allow for modes: 0, 1, 2, 3.
1018 if (mode < 0 || mode > 3) {
1019 return (-1);
1020 }
1021
1022 self->aggrMode = mode;
1023 if (mode == 0) {
1024 self->overdrive = 1.f;
1025 self->denoiseBound = 0.5f;
1026 self->gainmap = 0;
1027 } else if (mode == 1) {
1028 // self->overdrive = 1.25f;
1029 self->overdrive = 1.f;
1030 self->denoiseBound = 0.25f;
1031 self->gainmap = 1;
1032 } else if (mode == 2) {
1033 // self->overdrive = 1.25f;
1034 self->overdrive = 1.1f;
1035 self->denoiseBound = 0.125f;
1036 self->gainmap = 1;
1037 } else if (mode == 3) {
1038 // self->overdrive = 1.3f;
1039 self->overdrive = 1.25f;
1040 self->denoiseBound = 0.09f;
1041 self->gainmap = 1;
1042 }
1043 return 0;
1044 }
1045
WebRtcNs_AnalyzeCore(NoiseSuppressionC * self,const float * speechFrame)1046 void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) {
1047 size_t i;
1048 const size_t kStartBand = 5; // Skip first frequency bins during estimation.
1049 int updateParsFlag;
1050 float energy;
1051 float signalEnergy = 0.f;
1052 float sumMagn = 0.f;
1053 float tmpFloat1, tmpFloat2, tmpFloat3;
1054 float winData[ANAL_BLOCKL_MAX];
1055 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
1056 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
1057 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
1058 // Variables during startup.
1059 float sum_log_i = 0.0;
1060 float sum_log_i_square = 0.0;
1061 float sum_log_magn = 0.0;
1062 float sum_log_i_log_magn = 0.0;
1063 float parametric_exp = 0.0;
1064 float parametric_num = 0.0;
1065
1066 // Check that initiation has been done.
1067 RTC_DCHECK_EQ(1, self->initFlag);
1068 updateParsFlag = self->modelUpdatePars[0];
1069
1070 // Update analysis buffer for L band.
1071 UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf);
1072
1073 Windowing(self->window, self->analyzeBuf, self->anaLen, winData);
1074 energy = Energy(winData, self->anaLen);
1075 if (energy == 0.0) {
1076 // We want to avoid updating statistics in this case:
1077 // Updating feature statistics when we have zeros only will cause
1078 // thresholds to move towards zero signal situations. This in turn has the
1079 // effect that once the signal is "turned on" (non-zero values) everything
1080 // will be treated as speech and there is no noise suppression effect.
1081 // Depending on the duration of the inactive signal it takes a
1082 // considerable amount of time for the system to learn what is noise and
1083 // what is speech.
1084 self->signalEnergy = 0;
1085 return;
1086 }
1087
1088 self->blockInd++; // Update the block index only when we process a block.
1089
1090 FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
1091
1092 for (i = 0; i < self->magnLen; i++) {
1093 signalEnergy += real[i] * real[i] + imag[i] * imag[i];
1094 sumMagn += magn[i];
1095 if (self->blockInd < END_STARTUP_SHORT) {
1096 if (i >= kStartBand) {
1097 tmpFloat2 = logf((float)i);
1098 sum_log_i += tmpFloat2;
1099 sum_log_i_square += tmpFloat2 * tmpFloat2;
1100 tmpFloat1 = logf(magn[i]);
1101 sum_log_magn += tmpFloat1;
1102 sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
1103 }
1104 }
1105 }
1106 signalEnergy /= self->magnLen;
1107 self->signalEnergy = signalEnergy;
1108 self->sumMagn = sumMagn;
1109
1110 // Quantile noise estimate.
1111 NoiseEstimation(self, magn, noise);
1112 // Compute simplified noise model during startup.
1113 if (self->blockInd < END_STARTUP_SHORT) {
1114 // Estimate White noise.
1115 self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive;
1116 // Estimate Pink noise parameters.
1117 tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand);
1118 tmpFloat1 -= (sum_log_i * sum_log_i);
1119 tmpFloat2 =
1120 (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
1121 tmpFloat3 = tmpFloat2 / tmpFloat1;
1122 // Constrain the estimated spectrum to be positive.
1123 if (tmpFloat3 < 0.f) {
1124 tmpFloat3 = 0.f;
1125 }
1126 self->pinkNoiseNumerator += tmpFloat3;
1127 tmpFloat2 = (sum_log_i * sum_log_magn);
1128 tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn;
1129 tmpFloat3 = tmpFloat2 / tmpFloat1;
1130 // Constrain the pink noise power to be in the interval [0, 1].
1131 if (tmpFloat3 < 0.f) {
1132 tmpFloat3 = 0.f;
1133 }
1134 if (tmpFloat3 > 1.f) {
1135 tmpFloat3 = 1.f;
1136 }
1137 self->pinkNoiseExp += tmpFloat3;
1138
1139 // Calculate frequency independent parts of parametric noise estimate.
1140 if (self->pinkNoiseExp > 0.f) {
1141 // Use pink noise estimate.
1142 parametric_num =
1143 expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1));
1144 parametric_num *= (float)(self->blockInd + 1);
1145 parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1);
1146 }
1147 for (i = 0; i < self->magnLen; i++) {
1148 // Estimate the background noise using the white and pink noise
1149 // parameters.
1150 if (self->pinkNoiseExp == 0.f) {
1151 // Use white noise estimate.
1152 self->parametricNoise[i] = self->whiteNoiseLevel;
1153 } else {
1154 // Use pink noise estimate.
1155 float use_band = (float)(i < kStartBand ? kStartBand : i);
1156 self->parametricNoise[i] =
1157 parametric_num / powf(use_band, parametric_exp);
1158 }
1159 // Weight quantile noise with modeled noise.
1160 noise[i] *= (self->blockInd);
1161 tmpFloat2 =
1162 self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd);
1163 noise[i] += (tmpFloat2 / (float)(self->blockInd + 1));
1164 noise[i] /= END_STARTUP_SHORT;
1165 }
1166 }
1167 // Compute average signal during END_STARTUP_LONG time:
1168 // used to normalize spectral difference measure.
1169 if (self->blockInd < END_STARTUP_LONG) {
1170 self->featureData[5] *= self->blockInd;
1171 self->featureData[5] += signalEnergy;
1172 self->featureData[5] /= (self->blockInd + 1);
1173 }
1174
1175 // Post and prior SNR needed for SpeechNoiseProb.
1176 ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost);
1177
1178 FeatureUpdate(self, magn, updateParsFlag);
1179 SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost);
1180 UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise);
1181
1182 // Keep track of noise spectrum for next frame.
1183 memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
1184 memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
1185 }
1186
WebRtcNs_ProcessCore(NoiseSuppressionC * self,const float * const * speechFrame,size_t num_bands,float * const * outFrame)1187 void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
1188 const float* const* speechFrame,
1189 size_t num_bands,
1190 float* const* outFrame) {
1191 // Main routine for noise reduction.
1192 int flagHB = 0;
1193 size_t i, j;
1194
1195 float energy1, energy2, gain, factor, factor1, factor2;
1196 float fout[BLOCKL_MAX];
1197 float winData[ANAL_BLOCKL_MAX];
1198 float magn[HALF_ANAL_BLOCKL];
1199 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
1200 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
1201
1202 // SWB variables.
1203 int deltaBweHB = 1;
1204 int deltaGainHB = 1;
1205 float decayBweHB = 1.0;
1206 float gainMapParHB = 1.0;
1207 float gainTimeDomainHB = 1.0;
1208 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
1209 float sumMagnAnalyze, sumMagnProcess;
1210
1211 // Check that initiation has been done.
1212 RTC_DCHECK_EQ(1, self->initFlag);
1213 RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX);
1214
1215 const float* const* speechFrameHB = NULL;
1216 float* const* outFrameHB = NULL;
1217 size_t num_high_bands = 0;
1218 if (num_bands > 1) {
1219 speechFrameHB = &speechFrame[1];
1220 outFrameHB = &outFrame[1];
1221 num_high_bands = num_bands - 1;
1222 flagHB = 1;
1223 // Range for averaging low band quantities for H band gain.
1224 deltaBweHB = (int)self->magnLen / 4;
1225 deltaGainHB = deltaBweHB;
1226 }
1227
1228 // Update analysis buffer for L band.
1229 UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);
1230
1231 if (flagHB == 1) {
1232 // Update analysis buffer for H bands.
1233 for (i = 0; i < num_high_bands; ++i) {
1234 UpdateBuffer(speechFrameHB[i],
1235 self->blockLen,
1236 self->anaLen,
1237 self->dataBufHB[i]);
1238 }
1239 }
1240
1241 Windowing(self->window, self->dataBuf, self->anaLen, winData);
1242 energy1 = Energy(winData, self->anaLen);
1243 if (energy1 == 0.0 || self->signalEnergy == 0) {
1244 // Synthesize the special case of zero input.
1245 // Read out fully processed segment.
1246 for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
1247 fout[i - self->windShift] = self->syntBuf[i];
1248 }
1249 // Update synthesis buffer.
1250 UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
1251
1252 for (i = 0; i < self->blockLen; ++i)
1253 outFrame[0][i] =
1254 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
1255
1256 // For time-domain gain of HB.
1257 if (flagHB == 1) {
1258 for (i = 0; i < num_high_bands; ++i) {
1259 for (j = 0; j < self->blockLen; ++j) {
1260 outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
1261 self->dataBufHB[i][j],
1262 WEBRTC_SPL_WORD16_MIN);
1263 }
1264 }
1265 }
1266
1267 return;
1268 }
1269
1270 FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
1271
1272 if (self->blockInd < END_STARTUP_SHORT) {
1273 for (i = 0; i < self->magnLen; i++) {
1274 self->initMagnEst[i] += magn[i];
1275 }
1276 }
1277
1278 ComputeDdBasedWienerFilter(self, magn, theFilter);
1279
1280 for (i = 0; i < self->magnLen; i++) {
1281 // Flooring bottom.
1282 if (theFilter[i] < self->denoiseBound) {
1283 theFilter[i] = self->denoiseBound;
1284 }
1285 // Flooring top.
1286 if (theFilter[i] > 1.f) {
1287 theFilter[i] = 1.f;
1288 }
1289 if (self->blockInd < END_STARTUP_SHORT) {
1290 theFilterTmp[i] =
1291 (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]);
1292 theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f);
1293 // Flooring bottom.
1294 if (theFilterTmp[i] < self->denoiseBound) {
1295 theFilterTmp[i] = self->denoiseBound;
1296 }
1297 // Flooring top.
1298 if (theFilterTmp[i] > 1.f) {
1299 theFilterTmp[i] = 1.f;
1300 }
1301 // Weight the two suppression filters.
1302 theFilter[i] *= (self->blockInd);
1303 theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd);
1304 theFilter[i] += theFilterTmp[i];
1305 theFilter[i] /= (END_STARTUP_SHORT);
1306 }
1307
1308 self->smooth[i] = theFilter[i];
1309 real[i] *= self->smooth[i];
1310 imag[i] *= self->smooth[i];
1311 }
1312 // Keep track of |magn| spectrum for next frame.
1313 memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen);
1314 memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen);
1315 // Back to time domain.
1316 IFFT(self, real, imag, self->magnLen, self->anaLen, winData);
1317
1318 // Scale factor: only do it after END_STARTUP_LONG time.
1319 factor = 1.f;
1320 if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) {
1321 factor1 = 1.f;
1322 factor2 = 1.f;
1323
1324 energy2 = Energy(winData, self->anaLen);
1325 gain = (float)sqrt(energy2 / (energy1 + 1.f));
1326
1327 // Scaling for new version.
1328 if (gain > B_LIM) {
1329 factor1 = 1.f + 1.3f * (gain - B_LIM);
1330 if (gain * factor1 > 1.f) {
1331 factor1 = 1.f / gain;
1332 }
1333 }
1334 if (gain < B_LIM) {
1335 // Don't reduce scale too much for pause regions:
1336 // attenuation here should be controlled by flooring.
1337 if (gain <= self->denoiseBound) {
1338 gain = self->denoiseBound;
1339 }
1340 factor2 = 1.f - 0.3f * (B_LIM - gain);
1341 }
1342 // Combine both scales with speech/noise prob:
1343 // note prior (priorSpeechProb) is not frequency dependent.
1344 factor = self->priorSpeechProb * factor1 +
1345 (1.f - self->priorSpeechProb) * factor2;
1346 } // Out of self->gainmap == 1.
1347
1348 Windowing(self->window, winData, self->anaLen, winData);
1349
1350 // Synthesis.
1351 for (i = 0; i < self->anaLen; i++) {
1352 self->syntBuf[i] += factor * winData[i];
1353 }
1354 // Read out fully processed segment.
1355 for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
1356 fout[i - self->windShift] = self->syntBuf[i];
1357 }
1358 // Update synthesis buffer.
1359 UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
1360
1361 for (i = 0; i < self->blockLen; ++i)
1362 outFrame[0][i] =
1363 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
1364
1365 // For time-domain gain of HB.
1366 if (flagHB == 1) {
1367 // Average speech prob from low band.
1368 // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
1369 avgProbSpeechHB = 0.0;
1370 for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) {
1371 avgProbSpeechHB += self->speechProb[i];
1372 }
1373 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
1374 // If the speech was suppressed by a component between Analyze and
1375 // Process, for example the AEC, then it should not be considered speech
1376 // for high band suppression purposes.
1377 sumMagnAnalyze = 0;
1378 sumMagnProcess = 0;
1379 for (i = 0; i < self->magnLen; ++i) {
1380 sumMagnAnalyze += self->magnPrevAnalyze[i];
1381 sumMagnProcess += self->magnPrevProcess[i];
1382 }
1383 RTC_DCHECK_GT(sumMagnAnalyze, 0);
1384 avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze;
1385 // Average filter gain from low band.
1386 // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
1387 avgFilterGainHB = 0.0;
1388 for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) {
1389 avgFilterGainHB += self->smooth[i];
1390 }
1391 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
1392 avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f;
1393 // Gain based on speech probability.
1394 gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
1395 // Combine gain with low band gain.
1396 gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB;
1397 if (avgProbSpeechHB >= 0.5f) {
1398 gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB;
1399 }
1400 gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
1401 // Make sure gain is within flooring range.
1402 // Flooring bottom.
1403 if (gainTimeDomainHB < self->denoiseBound) {
1404 gainTimeDomainHB = self->denoiseBound;
1405 }
1406 // Flooring top.
1407 if (gainTimeDomainHB > 1.f) {
1408 gainTimeDomainHB = 1.f;
1409 }
1410 // Apply gain.
1411 for (i = 0; i < num_high_bands; ++i) {
1412 for (j = 0; j < self->blockLen; j++) {
1413 outFrameHB[i][j] =
1414 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
1415 gainTimeDomainHB * self->dataBufHB[i][j],
1416 WEBRTC_SPL_WORD16_MIN);
1417 }
1418 }
1419 } // End of H band gain computation.
1420 }
1421