1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <string.h>
13 #include <stdlib.h>
14 
15 #include "rtc_base/checks.h"
16 #include "common_audio/fft4g.h"
17 #include "common_audio/signal_processing/include/signal_processing_library.h"
18 #include "modules/audio_processing/ns/noise_suppression.h"
19 #include "modules/audio_processing/ns/ns_core.h"
20 #include "modules/audio_processing/ns/windows_private.h"
21 
22 // Set Feature Extraction Parameters.
set_feature_extraction_parameters(NoiseSuppressionC * self)23 static void set_feature_extraction_parameters(NoiseSuppressionC* self) {
24   // Bin size of histogram.
25   self->featureExtractionParams.binSizeLrt = 0.1f;
26   self->featureExtractionParams.binSizeSpecFlat = 0.05f;
27   self->featureExtractionParams.binSizeSpecDiff = 0.1f;
28 
29   // Range of histogram over which LRT threshold is computed.
30   self->featureExtractionParams.rangeAvgHistLrt = 1.f;
31 
32   // Scale parameters: multiply dominant peaks of the histograms by scale factor
33   // to obtain thresholds for prior model.
34   // For LRT and spectral difference.
35   self->featureExtractionParams.factor1ModelPars = 1.2f;
36   // For spectral_flatness: used when noise is flatter than speech.
37   self->featureExtractionParams.factor2ModelPars = 0.9f;
38 
39   // Peak limit for spectral flatness (varies between 0 and 1).
40   self->featureExtractionParams.thresPosSpecFlat = 0.6f;
41 
42   // Limit on spacing of two highest peaks in histogram: spacing determined by
43   // bin size.
44   self->featureExtractionParams.limitPeakSpacingSpecFlat =
45       2 * self->featureExtractionParams.binSizeSpecFlat;
46   self->featureExtractionParams.limitPeakSpacingSpecDiff =
47       2 * self->featureExtractionParams.binSizeSpecDiff;
48 
49   // Limit on relevance of second peak.
50   self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f;
51   self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f;
52 
53   // Fluctuation limit of LRT feature.
54   self->featureExtractionParams.thresFluctLrt = 0.05f;
55 
56   // Limit on the max and min values for the feature thresholds.
57   self->featureExtractionParams.maxLrt = 1.f;
58   self->featureExtractionParams.minLrt = 0.2f;
59 
60   self->featureExtractionParams.maxSpecFlat = 0.95f;
61   self->featureExtractionParams.minSpecFlat = 0.1f;
62 
63   self->featureExtractionParams.maxSpecDiff = 1.f;
64   self->featureExtractionParams.minSpecDiff = 0.16f;
65 
66   // Criteria of weight of histogram peak to accept/reject feature.
67   self->featureExtractionParams.thresWeightSpecFlat =
68       (int)(0.3 * (self->modelUpdatePars[1]));  // For spectral flatness.
69   self->featureExtractionParams.thresWeightSpecDiff =
70       (int)(0.3 * (self->modelUpdatePars[1]));  // For spectral difference.
71 }
72 
73 // Initialize state.
WebRtcNs_InitCore(NoiseSuppressionC * self,uint32_t fs)74 int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) {
75   int i;
76   // Check for valid pointer.
77   if (self == NULL) {
78     return -1;
79   }
80 
81   // Initialization of struct.
82   if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
83     self->fs = fs;
84   } else {
85     return -1;
86   }
87   self->windShift = 0;
88   // We only support 10ms frames.
89   if (fs == 8000) {
90     self->blockLen = 80;
91     self->anaLen = 128;
92     self->window = kBlocks80w128;
93   } else {
94     self->blockLen = 160;
95     self->anaLen = 256;
96     self->window = kBlocks160w256;
97   }
98   self->magnLen = self->anaLen / 2 + 1;  // Number of frequency bins.
99 
100   // Initialize FFT work arrays.
101   self->ip[0] = 0;  // Setting this triggers initialization.
102   memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
103   WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft);
104 
105   memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
106   memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
107   memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
108 
109   // For HB processing.
110   memset(self->dataBufHB,
111          0,
112          sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
113 
114   // For quantile noise estimation.
115   memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
116   for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
117     self->lquantile[i] = 8.f;
118     self->density[i] = 0.3f;
119   }
120 
121   for (i = 0; i < SIMULT; i++) {
122     self->counter[i] =
123         (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
124   }
125 
126   self->updates = 0;
127 
128   // Wiener filter initialization.
129   for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
130     self->smooth[i] = 1.f;
131   }
132 
133   // Set the aggressiveness: default.
134   self->aggrMode = 0;
135 
136   // Initialize variables for new method.
137   self->priorSpeechProb = 0.5f;  // Prior prob for speech/noise.
138   // Previous analyze mag spectrum.
139   memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL);
140   // Previous process mag spectrum.
141   memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL);
142   // Current noise-spectrum.
143   memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
144   // Previous noise-spectrum.
145   memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL);
146   // Conservative noise spectrum estimate.
147   memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL);
148   // For estimation of HB in second pass.
149   memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL);
150   // Initial average magnitude spectrum.
151   memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL);
152   for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
153     // Smooth LR (same as threshold).
154     self->logLrtTimeAvg[i] = LRT_FEATURE_THR;
155   }
156 
157   // Feature quantities.
158   // Spectral flatness (start on threshold).
159   self->featureData[0] = SF_FEATURE_THR;
160   self->featureData[1] = 0.f;  // Spectral entropy: not used in this version.
161   self->featureData[2] = 0.f;  // Spectral variance: not used in this version.
162   // Average LRT factor (start on threshold).
163   self->featureData[3] = LRT_FEATURE_THR;
164   // Spectral template diff (start on threshold).
165   self->featureData[4] = SF_FEATURE_THR;
166   self->featureData[5] = 0.f;  // Normalization for spectral difference.
167   // Window time-average of input magnitude spectrum.
168   self->featureData[6] = 0.f;
169 
170   memset(self->parametricNoise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
171 
172   // Histogram quantities: used to estimate/update thresholds for features.
173   memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST);
174   memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST);
175   memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST);
176 
177 
178   self->blockInd = -1;  // Frame counter.
179   // Default threshold for LRT feature.
180   self->priorModelPars[0] = LRT_FEATURE_THR;
181   // Threshold for spectral flatness: determined on-line.
182   self->priorModelPars[1] = 0.5f;
183   // sgn_map par for spectral measure: 1 for flatness measure.
184   self->priorModelPars[2] = 1.f;
185   // Threshold for template-difference feature: determined on-line.
186   self->priorModelPars[3] = 0.5f;
187   // Default weighting parameter for LRT feature.
188   self->priorModelPars[4] = 1.f;
189   // Default weighting parameter for spectral flatness feature.
190   self->priorModelPars[5] = 0.f;
191   // Default weighting parameter for spectral difference feature.
192   self->priorModelPars[6] = 0.f;
193 
194   // Update flag for parameters:
195   // 0 no update, 1 = update once, 2 = update every window.
196   self->modelUpdatePars[0] = 2;
197   self->modelUpdatePars[1] = 500;  // Window for update.
198   // Counter for update of conservative noise spectrum.
199   self->modelUpdatePars[2] = 0;
200   // Counter if the feature thresholds are updated during the sequence.
201   self->modelUpdatePars[3] = self->modelUpdatePars[1];
202 
203   self->signalEnergy = 0.0;
204   self->sumMagn = 0.0;
205   self->whiteNoiseLevel = 0.0;
206   self->pinkNoiseNumerator = 0.0;
207   self->pinkNoiseExp = 0.0;
208 
209   set_feature_extraction_parameters(self);
210 
211   // Default mode.
212   WebRtcNs_set_policy_core(self, 0);
213 
214   self->initFlag = 1;
215   return 0;
216 }
217 
218 // Estimate noise.
NoiseEstimation(NoiseSuppressionC * self,float * magn,float * noise)219 static void NoiseEstimation(NoiseSuppressionC* self,
220                             float* magn,
221                             float* noise) {
222   size_t i, s, offset;
223   float lmagn[HALF_ANAL_BLOCKL], delta;
224 
225   if (self->updates < END_STARTUP_LONG) {
226     self->updates++;
227   }
228 
229   for (i = 0; i < self->magnLen; i++) {
230     lmagn[i] = (float)log(magn[i]);
231   }
232 
233   // Loop over simultaneous estimates.
234   for (s = 0; s < SIMULT; s++) {
235     offset = s * self->magnLen;
236 
237     // newquantest(...)
238     for (i = 0; i < self->magnLen; i++) {
239       // Compute delta.
240       if (self->density[offset + i] > 1.0) {
241         delta = FACTOR * 1.f / self->density[offset + i];
242       } else {
243         delta = FACTOR;
244       }
245 
246       // Update log quantile estimate.
247       if (lmagn[i] > self->lquantile[offset + i]) {
248         self->lquantile[offset + i] +=
249             QUANTILE * delta / (float)(self->counter[s] + 1);
250       } else {
251         self->lquantile[offset + i] -=
252             (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1);
253       }
254 
255       // Update density estimate.
256       if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) {
257         self->density[offset + i] =
258             ((float)self->counter[s] * self->density[offset + i] +
259              1.f / (2.f * WIDTH)) /
260             (float)(self->counter[s] + 1);
261       }
262     }  // End loop over magnitude spectrum.
263 
264     if (self->counter[s] >= END_STARTUP_LONG) {
265       self->counter[s] = 0;
266       if (self->updates >= END_STARTUP_LONG) {
267         for (i = 0; i < self->magnLen; i++) {
268           self->quantile[i] = (float)exp(self->lquantile[offset + i]);
269         }
270       }
271     }
272 
273     self->counter[s]++;
274   }  // End loop over simultaneous estimates.
275 
276   // Sequentially update the noise during startup.
277   if (self->updates < END_STARTUP_LONG) {
278     // Use the last "s" to get noise during startup that differ from zero.
279     for (i = 0; i < self->magnLen; i++) {
280       self->quantile[i] = (float)exp(self->lquantile[offset + i]);
281     }
282   }
283 
284   for (i = 0; i < self->magnLen; i++) {
285     noise[i] = self->quantile[i];
286   }
287 }
288 
289 // Extract thresholds for feature parameters.
290 // Histograms are computed over some window size (given by
291 // self->modelUpdatePars[1]).
292 // Thresholds and weights are extracted every window.
293 // |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights.
294 // Threshold and weights are returned in: self->priorModelPars.
FeatureParameterExtraction(NoiseSuppressionC * self,int flag)295 static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) {
296   int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
297   int maxPeak1, maxPeak2;
298   int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
299       weightPeak2SpecDiff;
300 
301   float binMid, featureSum;
302   float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
303   float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
304 
305   // 3 features: LRT, flatness, difference.
306   // lrt_feature = self->featureData[3];
307   // flat_feature = self->featureData[0];
308   // diff_feature = self->featureData[4];
309 
310   // Update histograms.
311   if (flag == 0) {
312     // LRT
313     if ((self->featureData[3] <
314          HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) &&
315         (self->featureData[3] >= 0.0)) {
316       i = (int)(self->featureData[3] /
317                 self->featureExtractionParams.binSizeLrt);
318       self->histLrt[i]++;
319     }
320     // Spectral flatness.
321     if ((self->featureData[0] <
322          HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) &&
323         (self->featureData[0] >= 0.0)) {
324       i = (int)(self->featureData[0] /
325                 self->featureExtractionParams.binSizeSpecFlat);
326       self->histSpecFlat[i]++;
327     }
328     // Spectral difference.
329     if ((self->featureData[4] <
330          HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) &&
331         (self->featureData[4] >= 0.0)) {
332       i = (int)(self->featureData[4] /
333                 self->featureExtractionParams.binSizeSpecDiff);
334       self->histSpecDiff[i]++;
335     }
336   }
337 
338   // Extract parameters for speech/noise probability.
339   if (flag == 1) {
340     // LRT feature: compute the average over
341     // self->featureExtractionParams.rangeAvgHistLrt.
342     avgHistLrt = 0.0;
343     avgHistLrtCompl = 0.0;
344     avgSquareHistLrt = 0.0;
345     numHistLrt = 0;
346     for (i = 0; i < HIST_PAR_EST; i++) {
347       binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt;
348       if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) {
349         avgHistLrt += self->histLrt[i] * binMid;
350         numHistLrt += self->histLrt[i];
351       }
352       avgSquareHistLrt += self->histLrt[i] * binMid * binMid;
353       avgHistLrtCompl += self->histLrt[i] * binMid;
354     }
355     if (numHistLrt > 0) {
356       avgHistLrt = avgHistLrt / ((float)numHistLrt);
357     }
358     avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]);
359     avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]);
360     fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
361     // Get threshold for LRT feature.
362     if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
363       // Very low fluctuation, so likely noise.
364       self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
365     } else {
366       self->priorModelPars[0] =
367           self->featureExtractionParams.factor1ModelPars * avgHistLrt;
368       // Check if value is within min/max range.
369       if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) {
370         self->priorModelPars[0] = self->featureExtractionParams.minLrt;
371       }
372       if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) {
373         self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
374       }
375     }
376     // Done with LRT feature.
377 
378     // For spectral flatness and spectral difference: compute the main peaks of
379     // histogram.
380     maxPeak1 = 0;
381     maxPeak2 = 0;
382     posPeak1SpecFlat = 0.0;
383     posPeak2SpecFlat = 0.0;
384     weightPeak1SpecFlat = 0;
385     weightPeak2SpecFlat = 0;
386 
387     // Peaks for flatness.
388     for (i = 0; i < HIST_PAR_EST; i++) {
389       binMid =
390           (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat;
391       if (self->histSpecFlat[i] > maxPeak1) {
392         // Found new "first" peak.
393         maxPeak2 = maxPeak1;
394         weightPeak2SpecFlat = weightPeak1SpecFlat;
395         posPeak2SpecFlat = posPeak1SpecFlat;
396 
397         maxPeak1 = self->histSpecFlat[i];
398         weightPeak1SpecFlat = self->histSpecFlat[i];
399         posPeak1SpecFlat = binMid;
400       } else if (self->histSpecFlat[i] > maxPeak2) {
401         // Found new "second" peak.
402         maxPeak2 = self->histSpecFlat[i];
403         weightPeak2SpecFlat = self->histSpecFlat[i];
404         posPeak2SpecFlat = binMid;
405       }
406     }
407 
408     // Compute two peaks for spectral difference.
409     maxPeak1 = 0;
410     maxPeak2 = 0;
411     posPeak1SpecDiff = 0.0;
412     posPeak2SpecDiff = 0.0;
413     weightPeak1SpecDiff = 0;
414     weightPeak2SpecDiff = 0;
415     // Peaks for spectral difference.
416     for (i = 0; i < HIST_PAR_EST; i++) {
417       binMid =
418           ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff;
419       if (self->histSpecDiff[i] > maxPeak1) {
420         // Found new "first" peak.
421         maxPeak2 = maxPeak1;
422         weightPeak2SpecDiff = weightPeak1SpecDiff;
423         posPeak2SpecDiff = posPeak1SpecDiff;
424 
425         maxPeak1 = self->histSpecDiff[i];
426         weightPeak1SpecDiff = self->histSpecDiff[i];
427         posPeak1SpecDiff = binMid;
428       } else if (self->histSpecDiff[i] > maxPeak2) {
429         // Found new "second" peak.
430         maxPeak2 = self->histSpecDiff[i];
431         weightPeak2SpecDiff = self->histSpecDiff[i];
432         posPeak2SpecDiff = binMid;
433       }
434     }
435 
436     // For spectrum flatness feature.
437     useFeatureSpecFlat = 1;
438     // Merge the two peaks if they are close.
439     if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
440          self->featureExtractionParams.limitPeakSpacingSpecFlat) &&
441         (weightPeak2SpecFlat >
442          self->featureExtractionParams.limitPeakWeightsSpecFlat *
443              weightPeak1SpecFlat)) {
444       weightPeak1SpecFlat += weightPeak2SpecFlat;
445       posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat);
446     }
447     // Reject if weight of peaks is not large enough, or peak value too small.
448     if (weightPeak1SpecFlat <
449             self->featureExtractionParams.thresWeightSpecFlat ||
450         posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) {
451       useFeatureSpecFlat = 0;
452     }
453     // If selected, get the threshold.
454     if (useFeatureSpecFlat == 1) {
455       // Compute the threshold.
456       self->priorModelPars[1] =
457           self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
458       // Check if value is within min/max range.
459       if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) {
460         self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat;
461       }
462       if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) {
463         self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat;
464       }
465     }
466     // Done with flatness feature.
467 
468     // For template feature.
469     useFeatureSpecDiff = 1;
470     // Merge the two peaks if they are close.
471     if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
472          self->featureExtractionParams.limitPeakSpacingSpecDiff) &&
473         (weightPeak2SpecDiff >
474          self->featureExtractionParams.limitPeakWeightsSpecDiff *
475              weightPeak1SpecDiff)) {
476       weightPeak1SpecDiff += weightPeak2SpecDiff;
477       posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff);
478     }
479     // Get the threshold value.
480     self->priorModelPars[3] =
481         self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
482     // Reject if weight of peaks is not large enough.
483     if (weightPeak1SpecDiff <
484         self->featureExtractionParams.thresWeightSpecDiff) {
485       useFeatureSpecDiff = 0;
486     }
487     // Check if value is within min/max range.
488     if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) {
489       self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff;
490     }
491     if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) {
492       self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff;
493     }
494     // Done with spectral difference feature.
495 
496     // Don't use template feature if fluctuation of LRT feature is very low:
497     // most likely just noise state.
498     if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
499       useFeatureSpecDiff = 0;
500     }
501 
502     // Select the weights between the features.
503     // self->priorModelPars[4] is weight for LRT: always selected.
504     // self->priorModelPars[5] is weight for spectral flatness.
505     // self->priorModelPars[6] is weight for spectral difference.
506     featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
507     self->priorModelPars[4] = 1.f / featureSum;
508     self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
509     self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
510 
511     // Set hists to zero for next update.
512     if (self->modelUpdatePars[0] >= 1) {
513       for (i = 0; i < HIST_PAR_EST; i++) {
514         self->histLrt[i] = 0;
515         self->histSpecFlat[i] = 0;
516         self->histSpecDiff[i] = 0;
517       }
518     }
519   }  // End of flag == 1.
520 }
521 
522 // Compute spectral flatness on input spectrum.
523 // |magnIn| is the magnitude spectrum.
524 // Spectral flatness is returned in self->featureData[0].
ComputeSpectralFlatness(NoiseSuppressionC * self,const float * magnIn)525 static void ComputeSpectralFlatness(NoiseSuppressionC* self,
526                                     const float* magnIn) {
527   size_t i;
528   size_t shiftLP = 1;  // Option to remove first bin(s) from spectral measures.
529   float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
530 
531   // Compute spectral measures.
532   // For flatness.
533   avgSpectralFlatnessNum = 0.0;
534   avgSpectralFlatnessDen = self->sumMagn;
535   for (i = 0; i < shiftLP; i++) {
536     avgSpectralFlatnessDen -= magnIn[i];
537   }
538   // Compute log of ratio of the geometric to arithmetic mean: check for log(0)
539   // case.
540   for (i = shiftLP; i < self->magnLen; i++) {
541     if (magnIn[i] > 0.0) {
542       avgSpectralFlatnessNum += (float)log(magnIn[i]);
543     } else {
544       self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0];
545       return;
546     }
547   }
548   // Normalize.
549   avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen;
550   avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen;
551 
552   // Ratio and inverse log: check for case of log(0).
553   spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
554 
555   // Time-avg update of spectral flatness feature.
556   self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]);
557   // Done with flatness feature.
558 }
559 
560 // Compute prior and post SNR based on quantile noise estimation.
561 // Compute DD estimate of prior SNR.
562 // Inputs:
563 //   * |magn| is the signal magnitude spectrum estimate.
564 //   * |noise| is the magnitude noise spectrum estimate.
565 // Outputs:
566 //   * |snrLocPrior| is the computed prior SNR.
567 //   * |snrLocPost| is the computed post SNR.
ComputeSnr(const NoiseSuppressionC * self,const float * magn,const float * noise,float * snrLocPrior,float * snrLocPost)568 static void ComputeSnr(const NoiseSuppressionC* self,
569                        const float* magn,
570                        const float* noise,
571                        float* snrLocPrior,
572                        float* snrLocPost) {
573   size_t i;
574 
575   for (i = 0; i < self->magnLen; i++) {
576     // Previous post SNR.
577     // Previous estimate: based on previous frame with gain filter.
578     float previousEstimateStsa = self->magnPrevAnalyze[i] /
579         (self->noisePrev[i] + 0.0001f) * self->smooth[i];
580     // Post SNR.
581     snrLocPost[i] = 0.f;
582     if (magn[i] > noise[i]) {
583       snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f;
584     }
585     // DD estimate is sum of two terms: current estimate and previous estimate.
586     // Directed decision update of snrPrior.
587     snrLocPrior[i] =
588         DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i];
589   }  // End of loop over frequencies.
590 }
591 
592 // Compute the difference measure between input spectrum and a template/learned
593 // noise spectrum.
594 // |magnIn| is the input spectrum.
595 // The reference/template spectrum is self->magnAvgPause[i].
596 // Returns (normalized) spectral difference in self->featureData[4].
ComputeSpectralDifference(NoiseSuppressionC * self,const float * magnIn)597 static void ComputeSpectralDifference(NoiseSuppressionC* self,
598                                       const float* magnIn) {
599   // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
600   // var(magnAvgPause)
601   size_t i;
602   float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
603 
604   avgPause = 0.0;
605   avgMagn = self->sumMagn;
606   // Compute average quantities.
607   for (i = 0; i < self->magnLen; i++) {
608     // Conservative smooth noise spectrum from pause frames.
609     avgPause += self->magnAvgPause[i];
610   }
611   avgPause /= self->magnLen;
612   avgMagn /= self->magnLen;
613 
614   covMagnPause = 0.0;
615   varPause = 0.0;
616   varMagn = 0.0;
617   // Compute variance and covariance quantities.
618   for (i = 0; i < self->magnLen; i++) {
619     covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause);
620     varPause +=
621         (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause);
622     varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
623   }
624   covMagnPause /= self->magnLen;
625   varPause /= self->magnLen;
626   varMagn /= self->magnLen;
627   // Update of average magnitude spectrum.
628   self->featureData[6] += self->signalEnergy;
629 
630   avgDiffNormMagn =
631       varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f);
632   // Normalize and compute time-avg update of difference feature.
633   avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f));
634   self->featureData[4] +=
635       SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]);
636 }
637 
638 // Compute speech/noise probability.
639 // Speech/noise probability is returned in |probSpeechFinal|.
640 // |magn| is the input magnitude spectrum.
641 // |noise| is the noise spectrum.
642 // |snrLocPrior| is the prior SNR for each frequency.
643 // |snrLocPost| is the post SNR for each frequency.
SpeechNoiseProb(NoiseSuppressionC * self,float * probSpeechFinal,const float * snrLocPrior,const float * snrLocPost)644 static void SpeechNoiseProb(NoiseSuppressionC* self,
645                             float* probSpeechFinal,
646                             const float* snrLocPrior,
647                             const float* snrLocPost) {
648   size_t i;
649   int sgnMap;
650   float invLrt, gainPrior, indPrior;
651   float logLrtTimeAvgKsum, besselTmp;
652   float indicator0, indicator1, indicator2;
653   float tmpFloat1, tmpFloat2;
654   float weightIndPrior0, weightIndPrior1, weightIndPrior2;
655   float threshPrior0, threshPrior1, threshPrior2;
656   float widthPrior, widthPrior0, widthPrior1, widthPrior2;
657 
658   widthPrior0 = WIDTH_PR_MAP;
659   // Width for pause region: lower range, so increase width in tanh map.
660   widthPrior1 = 2.f * WIDTH_PR_MAP;
661   widthPrior2 = 2.f * WIDTH_PR_MAP;  // For spectral-difference measure.
662 
663   // Threshold parameters for features.
664   threshPrior0 = self->priorModelPars[0];
665   threshPrior1 = self->priorModelPars[1];
666   threshPrior2 = self->priorModelPars[3];
667 
668   // Sign for flatness feature.
669   sgnMap = (int)(self->priorModelPars[2]);
670 
671   // Weight parameters for features.
672   weightIndPrior0 = self->priorModelPars[4];
673   weightIndPrior1 = self->priorModelPars[5];
674   weightIndPrior2 = self->priorModelPars[6];
675 
676   // Compute feature based on average LR factor.
677   // This is the average over all frequencies of the smooth log LRT.
678   logLrtTimeAvgKsum = 0.0;
679   for (i = 0; i < self->magnLen; i++) {
680     tmpFloat1 = 1.f + 2.f * snrLocPrior[i];
681     tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f);
682     besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2;
683     self->logLrtTimeAvg[i] +=
684         LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]);
685     logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
686   }
687   logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen);
688   self->featureData[3] = logLrtTimeAvgKsum;
689   // Done with computation of LR factor.
690 
691   // Compute the indicator functions.
692   // Average LRT feature.
693   widthPrior = widthPrior0;
694   // Use larger width in tanh map for pause regions.
695   if (logLrtTimeAvgKsum < threshPrior0) {
696     widthPrior = widthPrior1;
697   }
698   // Compute indicator function: sigmoid map.
699   indicator0 =
700       0.5f *
701       ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f);
702 
703   // Spectral flatness feature.
704   tmpFloat1 = self->featureData[0];
705   widthPrior = widthPrior0;
706   // Use larger width in tanh map for pause regions.
707   if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
708     widthPrior = widthPrior1;
709   }
710   if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
711     widthPrior = widthPrior1;
712   }
713   // Compute indicator function: sigmoid map.
714   indicator1 =
715       0.5f *
716       ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
717        1.f);
718 
719   // For template spectrum-difference.
720   tmpFloat1 = self->featureData[4];
721   widthPrior = widthPrior0;
722   // Use larger width in tanh map for pause regions.
723   if (tmpFloat1 < threshPrior2) {
724     widthPrior = widthPrior2;
725   }
726   // Compute indicator function: sigmoid map.
727   indicator2 =
728       0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f);
729 
730   // Combine the indicator function with the feature weights.
731   indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
732              weightIndPrior2 * indicator2;
733   // Done with computing indicator function.
734 
735   // Compute the prior probability.
736   self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb);
737   // Make sure probabilities are within range: keep floor to 0.01.
738   if (self->priorSpeechProb > 1.f) {
739     self->priorSpeechProb = 1.f;
740   }
741   if (self->priorSpeechProb < 0.01f) {
742     self->priorSpeechProb = 0.01f;
743   }
744 
745   // Final speech probability: combine prior model with LR factor:.
746   gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f);
747   for (i = 0; i < self->magnLen; i++) {
748     invLrt = (float)exp(-self->logLrtTimeAvg[i]);
749     invLrt = (float)gainPrior * invLrt;
750     probSpeechFinal[i] = 1.f / (1.f + invLrt);
751   }
752 }
753 
754 // Update the noise features.
755 // Inputs:
756 //   * |magn| is the signal magnitude spectrum estimate.
757 //   * |updateParsFlag| is an update flag for parameters.
FeatureUpdate(NoiseSuppressionC * self,const float * magn,int updateParsFlag)758 static void FeatureUpdate(NoiseSuppressionC* self,
759                           const float* magn,
760                           int updateParsFlag) {
761   // Compute spectral flatness on input spectrum.
762   ComputeSpectralFlatness(self, magn);
763   // Compute difference of input spectrum with learned/estimated noise spectrum.
764   ComputeSpectralDifference(self, magn);
765   // Compute histograms for parameter decisions (thresholds and weights for
766   // features).
767   // Parameters are extracted once every window time.
768   // (=self->modelUpdatePars[1])
769   if (updateParsFlag >= 1) {
770     // Counter update.
771     self->modelUpdatePars[3]--;
772     // Update histogram.
773     if (self->modelUpdatePars[3] > 0) {
774       FeatureParameterExtraction(self, 0);
775     }
776     // Compute model parameters.
777     if (self->modelUpdatePars[3] == 0) {
778       FeatureParameterExtraction(self, 1);
779       self->modelUpdatePars[3] = self->modelUpdatePars[1];
780       // If wish to update only once, set flag to zero.
781       if (updateParsFlag == 1) {
782         self->modelUpdatePars[0] = 0;
783       } else {
784         // Update every window:
785         // Get normalization for spectral difference for next window estimate.
786         self->featureData[6] =
787             self->featureData[6] / ((float)self->modelUpdatePars[1]);
788         self->featureData[5] =
789             0.5f * (self->featureData[6] + self->featureData[5]);
790         self->featureData[6] = 0.f;
791       }
792     }
793   }
794 }
795 
796 // Update the noise estimate.
797 // Inputs:
798 //   * |magn| is the signal magnitude spectrum estimate.
799 //   * |snrLocPrior| is the prior SNR.
800 //   * |snrLocPost| is the post SNR.
801 // Output:
802 //   * |noise| is the updated noise magnitude spectrum estimate.
UpdateNoiseEstimate(NoiseSuppressionC * self,const float * magn,const float * snrLocPrior,const float * snrLocPost,float * noise)803 static void UpdateNoiseEstimate(NoiseSuppressionC* self,
804                                 const float* magn,
805                                 const float* snrLocPrior,
806                                 const float* snrLocPost,
807                                 float* noise) {
808   size_t i;
809   float probSpeech, probNonSpeech;
810   // Time-avg parameter for noise update.
811   float gammaNoiseTmp = NOISE_UPDATE;
812   float gammaNoiseOld;
813   float noiseUpdateTmp;
814 
815   for (i = 0; i < self->magnLen; i++) {
816     probSpeech = self->speechProb[i];
817     probNonSpeech = 1.f - probSpeech;
818     // Temporary noise update:
819     // Use it for speech frames if update value is less than previous.
820     noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] +
821                      (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
822                                               probSpeech * self->noisePrev[i]);
823     // Time-constant based on speech/noise state.
824     gammaNoiseOld = gammaNoiseTmp;
825     gammaNoiseTmp = NOISE_UPDATE;
826     // Increase gamma (i.e., less noise update) for frame likely to be speech.
827     if (probSpeech > PROB_RANGE) {
828       gammaNoiseTmp = SPEECH_UPDATE;
829     }
830     // Conservative noise update.
831     if (probSpeech < PROB_RANGE) {
832       self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]);
833     }
834     // Noise update.
835     if (gammaNoiseTmp == gammaNoiseOld) {
836       noise[i] = noiseUpdateTmp;
837     } else {
838       noise[i] = gammaNoiseTmp * self->noisePrev[i] +
839                  (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
840                                           probSpeech * self->noisePrev[i]);
841       // Allow for noise update downwards:
842       // If noise update decreases the noise, it is safe, so allow it to
843       // happen.
844       if (noiseUpdateTmp < noise[i]) {
845         noise[i] = noiseUpdateTmp;
846       }
847     }
848   }  // End of freq loop.
849 }
850 
851 // Updates |buffer| with a new |frame|.
852 // Inputs:
853 //   * |frame| is a new speech frame or NULL for setting to zero.
854 //   * |frame_length| is the length of the new frame.
855 //   * |buffer_length| is the length of the buffer.
856 // Output:
857 //   * |buffer| is the updated buffer.
UpdateBuffer(const float * frame,size_t frame_length,size_t buffer_length,float * buffer)858 static void UpdateBuffer(const float* frame,
859                          size_t frame_length,
860                          size_t buffer_length,
861                          float* buffer) {
862   RTC_DCHECK_LT(buffer_length, 2 * frame_length);
863 
864   memcpy(buffer,
865          buffer + frame_length,
866          sizeof(*buffer) * (buffer_length - frame_length));
867   if (frame) {
868     memcpy(buffer + buffer_length - frame_length,
869            frame,
870            sizeof(*buffer) * frame_length);
871   } else {
872     memset(buffer + buffer_length - frame_length,
873            0,
874            sizeof(*buffer) * frame_length);
875   }
876 }
877 
878 // Transforms the signal from time to frequency domain.
879 // Inputs:
880 //   * |time_data| is the signal in the time domain.
881 //   * |time_data_length| is the length of the analysis buffer.
882 //   * |magnitude_length| is the length of the spectrum magnitude, which equals
883 //     the length of both |real| and |imag| (time_data_length / 2 + 1).
884 // Outputs:
885 //   * |time_data| is the signal in the frequency domain.
886 //   * |real| is the real part of the frequency domain.
887 //   * |imag| is the imaginary part of the frequency domain.
888 //   * |magn| is the calculated signal magnitude in the frequency domain.
FFT(NoiseSuppressionC * self,float * time_data,size_t time_data_length,size_t magnitude_length,float * real,float * imag,float * magn)889 static void FFT(NoiseSuppressionC* self,
890                 float* time_data,
891                 size_t time_data_length,
892                 size_t magnitude_length,
893                 float* real,
894                 float* imag,
895                 float* magn) {
896   size_t i;
897 
898   RTC_DCHECK_EQ(magnitude_length, time_data_length / 2 + 1);
899 
900   WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft);
901 
902   imag[0] = 0;
903   real[0] = time_data[0];
904   magn[0] = fabsf(real[0]) + 1.f;
905   imag[magnitude_length - 1] = 0;
906   real[magnitude_length - 1] = time_data[1];
907   magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f;
908   for (i = 1; i < magnitude_length - 1; ++i) {
909     real[i] = time_data[2 * i];
910     imag[i] = time_data[2 * i + 1];
911     // Magnitude spectrum.
912     magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
913   }
914 }
915 
916 // Transforms the signal from frequency to time domain.
917 // Inputs:
918 //   * |real| is the real part of the frequency domain.
919 //   * |imag| is the imaginary part of the frequency domain.
920 //   * |magnitude_length| is the length of the spectrum magnitude, which equals
921 //     the length of both |real| and |imag|.
922 //   * |time_data_length| is the length of the analysis buffer
923 //     (2 * (magnitude_length - 1)).
924 // Output:
925 //   * |time_data| is the signal in the time domain.
IFFT(NoiseSuppressionC * self,const float * real,const float * imag,size_t magnitude_length,size_t time_data_length,float * time_data)926 static void IFFT(NoiseSuppressionC* self,
927                  const float* real,
928                  const float* imag,
929                  size_t magnitude_length,
930                  size_t time_data_length,
931                  float* time_data) {
932   size_t i;
933 
934   RTC_DCHECK_EQ(time_data_length, 2 * (magnitude_length - 1));
935 
936   time_data[0] = real[0];
937   time_data[1] = real[magnitude_length - 1];
938   for (i = 1; i < magnitude_length - 1; ++i) {
939     time_data[2 * i] = real[i];
940     time_data[2 * i + 1] = imag[i];
941   }
942   WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft);
943 
944   for (i = 0; i < time_data_length; ++i) {
945     time_data[i] *= 2.f / time_data_length;  // FFT scaling.
946   }
947 }
948 
949 // Calculates the energy of a buffer.
950 // Inputs:
951 //   * |buffer| is the buffer over which the energy is calculated.
952 //   * |length| is the length of the buffer.
953 // Returns the calculated energy.
Energy(const float * buffer,size_t length)954 static float Energy(const float* buffer, size_t length) {
955   size_t i;
956   float energy = 0.f;
957 
958   for (i = 0; i < length; ++i) {
959     energy += buffer[i] * buffer[i];
960   }
961 
962   return energy;
963 }
964 
965 // Windows a buffer.
966 // Inputs:
967 //   * |window| is the window by which to multiply.
968 //   * |data| is the data without windowing.
969 //   * |length| is the length of the window and data.
970 // Output:
971 //   * |data_windowed| is the windowed data.
Windowing(const float * window,const float * data,size_t length,float * data_windowed)972 static void Windowing(const float* window,
973                       const float* data,
974                       size_t length,
975                       float* data_windowed) {
976   size_t i;
977 
978   for (i = 0; i < length; ++i) {
979     data_windowed[i] = window[i] * data[i];
980   }
981 }
982 
983 // Estimate prior SNR decision-directed and compute DD based Wiener Filter.
984 // Input:
985 //   * |magn| is the signal magnitude spectrum estimate.
986 // Output:
987 //   * |theFilter| is the frequency response of the computed Wiener filter.
ComputeDdBasedWienerFilter(const NoiseSuppressionC * self,const float * magn,float * theFilter)988 static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self,
989                                        const float* magn,
990                                        float* theFilter) {
991   size_t i;
992   float snrPrior, previousEstimateStsa, currentEstimateStsa;
993 
994   for (i = 0; i < self->magnLen; i++) {
995     // Previous estimate: based on previous frame with gain filter.
996     previousEstimateStsa = self->magnPrevProcess[i] /
997                            (self->noisePrev[i] + 0.0001f) * self->smooth[i];
998     // Post and prior SNR.
999     currentEstimateStsa = 0.f;
1000     if (magn[i] > self->noise[i]) {
1001       currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f;
1002     }
1003     // DD estimate is sum of two terms: current estimate and previous estimate.
1004     // Directed decision update of |snrPrior|.
1005     snrPrior = DD_PR_SNR * previousEstimateStsa +
1006                (1.f - DD_PR_SNR) * currentEstimateStsa;
1007     // Gain filter.
1008     theFilter[i] = snrPrior / (self->overdrive + snrPrior);
1009   }  // End of loop over frequencies.
1010 }
1011 
1012 // Changes the aggressiveness of the noise suppression method.
1013 // |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is
1014 // aggressive (15dB).
1015 // Returns 0 on success and -1 otherwise.
WebRtcNs_set_policy_core(NoiseSuppressionC * self,int mode)1016 int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) {
1017   // Allow for modes: 0, 1, 2, 3.
1018   if (mode < 0 || mode > 3) {
1019     return (-1);
1020   }
1021 
1022   self->aggrMode = mode;
1023   if (mode == 0) {
1024     self->overdrive = 1.f;
1025     self->denoiseBound = 0.5f;
1026     self->gainmap = 0;
1027   } else if (mode == 1) {
1028     // self->overdrive = 1.25f;
1029     self->overdrive = 1.f;
1030     self->denoiseBound = 0.25f;
1031     self->gainmap = 1;
1032   } else if (mode == 2) {
1033     // self->overdrive = 1.25f;
1034     self->overdrive = 1.1f;
1035     self->denoiseBound = 0.125f;
1036     self->gainmap = 1;
1037   } else if (mode == 3) {
1038     // self->overdrive = 1.3f;
1039     self->overdrive = 1.25f;
1040     self->denoiseBound = 0.09f;
1041     self->gainmap = 1;
1042   }
1043   return 0;
1044 }
1045 
WebRtcNs_AnalyzeCore(NoiseSuppressionC * self,const float * speechFrame)1046 void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) {
1047   size_t i;
1048   const size_t kStartBand = 5;  // Skip first frequency bins during estimation.
1049   int updateParsFlag;
1050   float energy;
1051   float signalEnergy = 0.f;
1052   float sumMagn = 0.f;
1053   float tmpFloat1, tmpFloat2, tmpFloat3;
1054   float winData[ANAL_BLOCKL_MAX];
1055   float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
1056   float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
1057   float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
1058   // Variables during startup.
1059   float sum_log_i = 0.0;
1060   float sum_log_i_square = 0.0;
1061   float sum_log_magn = 0.0;
1062   float sum_log_i_log_magn = 0.0;
1063   float parametric_exp = 0.0;
1064   float parametric_num = 0.0;
1065 
1066   // Check that initiation has been done.
1067   RTC_DCHECK_EQ(1, self->initFlag);
1068   updateParsFlag = self->modelUpdatePars[0];
1069 
1070   // Update analysis buffer for L band.
1071   UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf);
1072 
1073   Windowing(self->window, self->analyzeBuf, self->anaLen, winData);
1074   energy = Energy(winData, self->anaLen);
1075   if (energy == 0.0) {
1076     // We want to avoid updating statistics in this case:
1077     // Updating feature statistics when we have zeros only will cause
1078     // thresholds to move towards zero signal situations. This in turn has the
1079     // effect that once the signal is "turned on" (non-zero values) everything
1080     // will be treated as speech and there is no noise suppression effect.
1081     // Depending on the duration of the inactive signal it takes a
1082     // considerable amount of time for the system to learn what is noise and
1083     // what is speech.
1084     return;
1085   }
1086 
1087   self->blockInd++;  // Update the block index only when we process a block.
1088 
1089   FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
1090 
1091   for (i = 0; i < self->magnLen; i++) {
1092     signalEnergy += real[i] * real[i] + imag[i] * imag[i];
1093     sumMagn += magn[i];
1094     if (self->blockInd < END_STARTUP_SHORT) {
1095       if (i >= kStartBand) {
1096         tmpFloat2 = logf((float)i);
1097         sum_log_i += tmpFloat2;
1098         sum_log_i_square += tmpFloat2 * tmpFloat2;
1099         tmpFloat1 = logf(magn[i]);
1100         sum_log_magn += tmpFloat1;
1101         sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
1102       }
1103     }
1104   }
1105   signalEnergy /= self->magnLen;
1106   self->signalEnergy = signalEnergy;
1107   self->sumMagn = sumMagn;
1108 
1109   // Quantile noise estimate.
1110   NoiseEstimation(self, magn, noise);
1111   // Compute simplified noise model during startup.
1112   if (self->blockInd < END_STARTUP_SHORT) {
1113     // Estimate White noise.
1114     self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive;
1115     // Estimate Pink noise parameters.
1116     tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand);
1117     tmpFloat1 -= (sum_log_i * sum_log_i);
1118     tmpFloat2 =
1119         (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
1120     tmpFloat3 = tmpFloat2 / tmpFloat1;
1121     // Constrain the estimated spectrum to be positive.
1122     if (tmpFloat3 < 0.f) {
1123       tmpFloat3 = 0.f;
1124     }
1125     self->pinkNoiseNumerator += tmpFloat3;
1126     tmpFloat2 = (sum_log_i * sum_log_magn);
1127     tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn;
1128     tmpFloat3 = tmpFloat2 / tmpFloat1;
1129     // Constrain the pink noise power to be in the interval [0, 1].
1130     if (tmpFloat3 < 0.f) {
1131       tmpFloat3 = 0.f;
1132     }
1133     if (tmpFloat3 > 1.f) {
1134       tmpFloat3 = 1.f;
1135     }
1136     self->pinkNoiseExp += tmpFloat3;
1137 
1138     // Calculate frequency independent parts of parametric noise estimate.
1139     if (self->pinkNoiseExp > 0.f) {
1140       // Use pink noise estimate.
1141       parametric_num =
1142           expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1));
1143       parametric_num *= (float)(self->blockInd + 1);
1144       parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1);
1145     }
1146     for (i = 0; i < self->magnLen; i++) {
1147       // Estimate the background noise using the white and pink noise
1148       // parameters.
1149       if (self->pinkNoiseExp == 0.f) {
1150         // Use white noise estimate.
1151         self->parametricNoise[i] = self->whiteNoiseLevel;
1152       } else {
1153         // Use pink noise estimate.
1154         float use_band = (float)(i < kStartBand ? kStartBand : i);
1155         self->parametricNoise[i] =
1156             parametric_num / powf(use_band, parametric_exp);
1157       }
1158       // Weight quantile noise with modeled noise.
1159       noise[i] *= (self->blockInd);
1160       tmpFloat2 =
1161           self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd);
1162       noise[i] += (tmpFloat2 / (float)(self->blockInd + 1));
1163       noise[i] /= END_STARTUP_SHORT;
1164     }
1165   }
1166   // Compute average signal during END_STARTUP_LONG time:
1167   // used to normalize spectral difference measure.
1168   if (self->blockInd < END_STARTUP_LONG) {
1169     self->featureData[5] *= self->blockInd;
1170     self->featureData[5] += signalEnergy;
1171     self->featureData[5] /= (self->blockInd + 1);
1172   }
1173 
1174   // Post and prior SNR needed for SpeechNoiseProb.
1175   ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost);
1176 
1177   FeatureUpdate(self, magn, updateParsFlag);
1178   SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost);
1179   UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise);
1180 
1181   // Keep track of noise spectrum for next frame.
1182   memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
1183   memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
1184 }
1185 
WebRtcNs_ProcessCore(NoiseSuppressionC * self,const float * const * speechFrame,size_t num_bands,float * const * outFrame)1186 void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
1187                           const float* const* speechFrame,
1188                           size_t num_bands,
1189                           float* const* outFrame) {
1190   // Main routine for noise reduction.
1191   int flagHB = 0;
1192   size_t i, j;
1193 
1194   float energy1, energy2, gain, factor, factor1, factor2;
1195   float fout[BLOCKL_MAX];
1196   float winData[ANAL_BLOCKL_MAX];
1197   float magn[HALF_ANAL_BLOCKL];
1198   float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
1199   float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
1200 
1201   // SWB variables.
1202   int deltaBweHB = 1;
1203   int deltaGainHB = 1;
1204   float decayBweHB = 1.0;
1205   float gainMapParHB = 1.0;
1206   float gainTimeDomainHB = 1.0;
1207   float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
1208   float sumMagnAnalyze, sumMagnProcess;
1209 
1210   // Check that initiation has been done.
1211   RTC_DCHECK_EQ(1, self->initFlag);
1212   RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX);
1213 
1214   const float* const* speechFrameHB = NULL;
1215   float* const* outFrameHB = NULL;
1216   size_t num_high_bands = 0;
1217   if (num_bands > 1) {
1218     speechFrameHB = &speechFrame[1];
1219     outFrameHB = &outFrame[1];
1220     num_high_bands = num_bands - 1;
1221     flagHB = 1;
1222     // Range for averaging low band quantities for H band gain.
1223     deltaBweHB = (int)self->magnLen / 4;
1224     deltaGainHB = deltaBweHB;
1225   }
1226 
1227   // Update analysis buffer for L band.
1228   UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);
1229 
1230   if (flagHB == 1) {
1231     // Update analysis buffer for H bands.
1232     for (i = 0; i < num_high_bands; ++i) {
1233       UpdateBuffer(speechFrameHB[i],
1234                    self->blockLen,
1235                    self->anaLen,
1236                    self->dataBufHB[i]);
1237     }
1238   }
1239 
1240   Windowing(self->window, self->dataBuf, self->anaLen, winData);
1241   energy1 = Energy(winData, self->anaLen);
1242   if (energy1 == 0.0) {
1243     // Synthesize the special case of zero input.
1244     // Read out fully processed segment.
1245     for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
1246       fout[i - self->windShift] = self->syntBuf[i];
1247     }
1248     // Update synthesis buffer.
1249     UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
1250 
1251     for (i = 0; i < self->blockLen; ++i)
1252       outFrame[0][i] =
1253           WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
1254 
1255     // For time-domain gain of HB.
1256     if (flagHB == 1) {
1257       for (i = 0; i < num_high_bands; ++i) {
1258         for (j = 0; j < self->blockLen; ++j) {
1259           outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
1260                                             self->dataBufHB[i][j],
1261                                             WEBRTC_SPL_WORD16_MIN);
1262         }
1263       }
1264     }
1265 
1266     return;
1267   }
1268 
1269   FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
1270 
1271   if (self->blockInd < END_STARTUP_SHORT) {
1272     for (i = 0; i < self->magnLen; i++) {
1273       self->initMagnEst[i] += magn[i];
1274     }
1275   }
1276 
1277   ComputeDdBasedWienerFilter(self, magn, theFilter);
1278 
1279   for (i = 0; i < self->magnLen; i++) {
1280     // Flooring bottom.
1281     if (theFilter[i] < self->denoiseBound) {
1282       theFilter[i] = self->denoiseBound;
1283     }
1284     // Flooring top.
1285     if (theFilter[i] > 1.f) {
1286       theFilter[i] = 1.f;
1287     }
1288     if (self->blockInd < END_STARTUP_SHORT) {
1289       theFilterTmp[i] =
1290           (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]);
1291       theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f);
1292       // Flooring bottom.
1293       if (theFilterTmp[i] < self->denoiseBound) {
1294         theFilterTmp[i] = self->denoiseBound;
1295       }
1296       // Flooring top.
1297       if (theFilterTmp[i] > 1.f) {
1298         theFilterTmp[i] = 1.f;
1299       }
1300       // Weight the two suppression filters.
1301       theFilter[i] *= (self->blockInd);
1302       theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd);
1303       theFilter[i] += theFilterTmp[i];
1304       theFilter[i] /= (END_STARTUP_SHORT);
1305     }
1306 
1307     self->smooth[i] = theFilter[i];
1308     real[i] *= self->smooth[i];
1309     imag[i] *= self->smooth[i];
1310   }
1311   // Keep track of |magn| spectrum for next frame.
1312   memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen);
1313   memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen);
1314   // Back to time domain.
1315   IFFT(self, real, imag, self->magnLen, self->anaLen, winData);
1316 
1317   // Scale factor: only do it after END_STARTUP_LONG time.
1318   factor = 1.f;
1319   if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) {
1320     factor1 = 1.f;
1321     factor2 = 1.f;
1322 
1323     energy2 = Energy(winData, self->anaLen);
1324     gain = (float)sqrt(energy2 / (energy1 + 1.f));
1325 
1326     // Scaling for new version.
1327     if (gain > B_LIM) {
1328       factor1 = 1.f + 1.3f * (gain - B_LIM);
1329       if (gain * factor1 > 1.f) {
1330         factor1 = 1.f / gain;
1331       }
1332     }
1333     if (gain < B_LIM) {
1334       // Don't reduce scale too much for pause regions:
1335       // attenuation here should be controlled by flooring.
1336       if (gain <= self->denoiseBound) {
1337         gain = self->denoiseBound;
1338       }
1339       factor2 = 1.f - 0.3f * (B_LIM - gain);
1340     }
1341     // Combine both scales with speech/noise prob:
1342     // note prior (priorSpeechProb) is not frequency dependent.
1343     factor = self->priorSpeechProb * factor1 +
1344              (1.f - self->priorSpeechProb) * factor2;
1345   }  // Out of self->gainmap == 1.
1346 
1347   Windowing(self->window, winData, self->anaLen, winData);
1348 
1349   // Synthesis.
1350   for (i = 0; i < self->anaLen; i++) {
1351     self->syntBuf[i] += factor * winData[i];
1352   }
1353   // Read out fully processed segment.
1354   for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
1355     fout[i - self->windShift] = self->syntBuf[i];
1356   }
1357   // Update synthesis buffer.
1358   UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
1359 
1360   for (i = 0; i < self->blockLen; ++i)
1361     outFrame[0][i] =
1362         WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
1363 
1364   // For time-domain gain of HB.
1365   if (flagHB == 1) {
1366     // Average speech prob from low band.
1367     // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
1368     avgProbSpeechHB = 0.0;
1369     for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) {
1370       avgProbSpeechHB += self->speechProb[i];
1371     }
1372     avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
1373     // If the speech was suppressed by a component between Analyze and
1374     // Process, for example the AEC, then it should not be considered speech
1375     // for high band suppression purposes.
1376     sumMagnAnalyze = 0;
1377     sumMagnProcess = 0;
1378     for (i = 0; i < self->magnLen; ++i) {
1379       sumMagnAnalyze += self->magnPrevAnalyze[i];
1380       sumMagnProcess += self->magnPrevProcess[i];
1381     }
1382     avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze;
1383     // Average filter gain from low band.
1384     // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
1385     avgFilterGainHB = 0.0;
1386     for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) {
1387       avgFilterGainHB += self->smooth[i];
1388     }
1389     avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
1390     avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f;
1391     // Gain based on speech probability.
1392     gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
1393     // Combine gain with low band gain.
1394     gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB;
1395     if (avgProbSpeechHB >= 0.5f) {
1396       gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB;
1397     }
1398     gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
1399     // Make sure gain is within flooring range.
1400     // Flooring bottom.
1401     if (gainTimeDomainHB < self->denoiseBound) {
1402       gainTimeDomainHB = self->denoiseBound;
1403     }
1404     // Flooring top.
1405     if (gainTimeDomainHB > 1.f) {
1406       gainTimeDomainHB = 1.f;
1407     }
1408     // Apply gain.
1409     for (i = 0; i < num_high_bands; ++i) {
1410       for (j = 0; j < self->blockLen; j++) {
1411         outFrameHB[i][j] =
1412             WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
1413                            gainTimeDomainHB * self->dataBufHB[i][j],
1414                            WEBRTC_SPL_WORD16_MIN);
1415       }
1416     }
1417   }  // End of H band gain computation.
1418 }
1419