1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc/legacy/digital_agc.h"
12
13 #include <string.h>
14
15 #include "modules/audio_processing/agc/legacy/gain_control.h"
16 #include "rtc_base/checks.h"
17
18 namespace webrtc {
19
20 namespace {
21
22 // To generate the gaintable, copy&paste the following lines to a Matlab window:
23 // MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
24 // zeros = 0:31; lvl = 2.^(1-zeros);
25 // A = -10*log10(lvl) * (CompRatio - 1) / CompRatio;
26 // B = MaxGain - MinGain;
27 // gains = round(2^16*10.^(0.05 * (MinGain + B * (
28 // log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) /
29 // log(1/(1+exp(Knee*B))))));
30 // fprintf(1, '\t%i, %i, %i, %i,\n', gains);
31 // % Matlab code for plotting the gain and input/output level characteristic
32 // (copy/paste the following 3 lines):
33 // in = 10*log10(lvl); out = 20*log10(gains/65536);
34 // subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input
35 // (dB)'); ylabel('Gain (dB)');
36 // subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on;
37 // xlabel('Input (dB)'); ylabel('Output (dB)');
38 // zoom on;
39
40 // Generator table for y=log2(1+e^x) in Q8.
41 enum { kGenFuncTableSize = 128 };
42 static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
43 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693,
44 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756,
45 8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819,
46 12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881,
47 16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944,
48 20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006,
49 24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069,
50 28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132,
51 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194,
52 36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257,
53 40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320,
54 44689, 45058, 45428, 45797, 46166, 46536, 46905};
55
56 static const int16_t kAvgDecayTime = 250; // frames; < 3000
57
58 // the 32 most significant bits of A(19) * B(26) >> 13
59 #define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13))
60 // C + the 32 most significant bits of A * B
61 #define AGC_SCALEDIFF32(A, B, C) \
62 ((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16))
63
64 } // namespace
65
WebRtcAgc_CalculateGainTable(int32_t * gainTable,int16_t digCompGaindB,int16_t targetLevelDbfs,uint8_t limiterEnable,int16_t analogTarget)66 int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16
67 int16_t digCompGaindB, // Q0
68 int16_t targetLevelDbfs, // Q0
69 uint8_t limiterEnable,
70 int16_t analogTarget) { // Q0
71 // This function generates the compressor gain table used in the fixed digital
72 // part.
73 uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
74 int32_t inLevel, limiterLvl;
75 int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
76 const uint16_t kLog10 = 54426; // log2(10) in Q14
77 const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14
78 const uint16_t kLogE_1 = 23637; // log2(e) in Q14
79 uint16_t constMaxGain;
80 uint16_t tmpU16, intPart, fracPart;
81 const int16_t kCompRatio = 3;
82 const int16_t kSoftLimiterLeft = 1;
83 int16_t limiterOffset = 0; // Limiter offset
84 int16_t limiterIdx, limiterLvlX;
85 int16_t constLinApprox, zeroGainLvl, maxGain, diffGain;
86 int16_t i, tmp16, tmp16no1;
87 int zeros, zerosScale;
88
89 // Constants
90 // kLogE_1 = 23637; // log2(e) in Q14
91 // kLog10 = 54426; // log2(10) in Q14
92 // kLog10_2 = 49321; // 10*log10(2) in Q14
93
94 // Calculate maximum digital gain and zero gain level
95 tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
96 tmp16no1 = analogTarget - targetLevelDbfs;
97 tmp16no1 +=
98 WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
99 maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs));
100 tmp32no1 = maxGain * kCompRatio;
101 zeroGainLvl = digCompGaindB;
102 zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1),
103 kCompRatio - 1);
104 if ((digCompGaindB <= analogTarget) && (limiterEnable)) {
105 zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft);
106 limiterOffset = 0;
107 }
108
109 // Calculate the difference between maximum gain and gain at 0dB0v:
110 // diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
111 // = (compRatio-1)*digCompGaindB/compRatio
112 tmp32no1 = digCompGaindB * (kCompRatio - 1);
113 diffGain =
114 WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
115 if (diffGain < 0 || diffGain >= kGenFuncTableSize) {
116 RTC_DCHECK(0);
117 return -1;
118 }
119
120 // Calculate the limiter level and index:
121 // limiterLvlX = analogTarget - limiterOffset
122 // limiterLvl = targetLevelDbfs + limiterOffset/compRatio
123 limiterLvlX = analogTarget - limiterOffset;
124 limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13),
125 kLog10_2 / 2);
126 tmp16no1 =
127 WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
128 limiterLvl = targetLevelDbfs + tmp16no1;
129
130 // Calculate (through table lookup):
131 // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8)
132 constMaxGain = kGenFuncTable[diffGain]; // in Q8
133
134 // Calculate a parameter used to approximate the fractional part of 2^x with a
135 // piecewise linear function in Q14:
136 // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14);
137 constLinApprox = 22817; // in Q14
138
139 // Calculate a denominator used in the exponential part to convert from dB to
140 // linear scale:
141 // den = 20*constMaxGain (in Q8)
142 den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8
143
144 for (i = 0; i < 32; i++) {
145 // Calculate scaled input level (compressor):
146 // inLevel =
147 // fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
148 tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0
149 tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14
150 inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14
151
152 // Calculate diffGain-inLevel, to map using the genFuncTable
153 inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14
154
155 // Make calculations on abs(inLevel) and compensate for the sign afterwards.
156 absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14
157
158 // LUT with interpolation
159 intPart = (uint16_t)(absInLevel >> 14);
160 fracPart =
161 (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part
162 tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
163 tmpU32no1 = tmpU16 * fracPart; // Q22
164 tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22
165 logApprox = tmpU32no1 >> 8; // Q14
166 // Compensate for negative exponent using the relation:
167 // log2(1 + 2^-x) = log2(1 + 2^x) - x
168 if (inLevel < 0) {
169 zeros = WebRtcSpl_NormU32(absInLevel);
170 zerosScale = 0;
171 if (zeros < 15) {
172 // Not enough space for multiplication
173 tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1)
174 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13)
175 if (zeros < 9) {
176 zerosScale = 9 - zeros;
177 tmpU32no1 >>= zerosScale; // Q(zeros+13)
178 } else {
179 tmpU32no2 >>= zeros - 9; // Q22
180 }
181 } else {
182 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28
183 tmpU32no2 >>= 6; // Q22
184 }
185 logApprox = 0;
186 if (tmpU32no2 < tmpU32no1) {
187 logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14
188 }
189 }
190 numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14
191 numFIX -= (int32_t)logApprox * diffGain; // Q14
192
193 // Calculate ratio
194 // Shift |numFIX| as much as possible.
195 // Ensure we avoid wrap-around in |den| as well.
196 if (numFIX > (den >> 8) || -numFIX > (den >> 8)) { // |den| is Q8.
197 zeros = WebRtcSpl_NormW32(numFIX);
198 } else {
199 zeros = WebRtcSpl_NormW32(den) + 8;
200 }
201 numFIX *= 1 << zeros; // Q(14+zeros)
202
203 // Shift den so we end up in Qy1
204 tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1)
205 y32 = numFIX / tmp32no1; // in Q15
206 // This is to do rounding in Q14.
207 y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1);
208
209 if (limiterEnable && (i < limiterIdx)) {
210 tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14
211 tmp32 -= limiterLvl * (1 << 14); // Q14
212 y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20);
213 }
214 if (y32 > 39000) {
215 tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27
216 tmp32 >>= 13; // In Q14.
217 } else {
218 tmp32 = y32 * kLog10 + 8192; // in Q28
219 tmp32 >>= 14; // In Q14.
220 }
221 tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16)
222
223 // Calculate power
224 if (tmp32 > 0) {
225 intPart = (int16_t)(tmp32 >> 14);
226 fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14
227 if ((fracPart >> 13) != 0) {
228 tmp16 = (2 << 14) - constLinApprox;
229 tmp32no2 = (1 << 14) - fracPart;
230 tmp32no2 *= tmp16;
231 tmp32no2 >>= 13;
232 tmp32no2 = (1 << 14) - tmp32no2;
233 } else {
234 tmp16 = constLinApprox - (1 << 14);
235 tmp32no2 = (fracPart * tmp16) >> 13;
236 }
237 fracPart = (uint16_t)tmp32no2;
238 gainTable[i] =
239 (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
240 } else {
241 gainTable[i] = 0;
242 }
243 }
244
245 return 0;
246 }
247
WebRtcAgc_InitDigital(DigitalAgc * stt,int16_t agcMode)248 int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) {
249 if (agcMode == kAgcModeFixedDigital) {
250 // start at minimum to find correct gain faster
251 stt->capacitorSlow = 0;
252 } else {
253 // start out with 0 dB gain
254 stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
255 }
256 stt->capacitorFast = 0;
257 stt->gain = 65536;
258 stt->gatePrevious = 0;
259 stt->agcMode = agcMode;
260
261 // initialize VADs
262 WebRtcAgc_InitVad(&stt->vadNearend);
263 WebRtcAgc_InitVad(&stt->vadFarend);
264
265 return 0;
266 }
267
WebRtcAgc_AddFarendToDigital(DigitalAgc * stt,const int16_t * in_far,size_t nrSamples)268 int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt,
269 const int16_t* in_far,
270 size_t nrSamples) {
271 RTC_DCHECK(stt);
272 // VAD for far end
273 WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);
274
275 return 0;
276 }
277
278 // Gains is an 11 element long array (one value per ms, incl start & end).
WebRtcAgc_ComputeDigitalGains(DigitalAgc * stt,const int16_t * const * in_near,size_t num_bands,uint32_t FS,int16_t lowlevelSignal,int32_t gains[11])279 int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* stt,
280 const int16_t* const* in_near,
281 size_t num_bands,
282 uint32_t FS,
283 int16_t lowlevelSignal,
284 int32_t gains[11]) {
285 int32_t tmp32;
286 int32_t env[10];
287 int32_t max_nrg;
288 int32_t cur_level;
289 int32_t gain32;
290 int16_t logratio;
291 int16_t lower_thr, upper_thr;
292 int16_t zeros = 0, zeros_fast, frac = 0;
293 int16_t decay;
294 int16_t gate, gain_adj;
295 int16_t k;
296 size_t n, L;
297 int16_t L2; // samples/subframe
298
299 // determine number of samples per ms
300 if (FS == 8000) {
301 L = 8;
302 L2 = 3;
303 } else if (FS == 16000 || FS == 32000 || FS == 48000) {
304 L = 16;
305 L2 = 4;
306 } else {
307 return -1;
308 }
309
310 // VAD for near end
311 logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, in_near[0], L * 10);
312
313 // Account for far end VAD
314 if (stt->vadFarend.counter > 10) {
315 tmp32 = 3 * logratio;
316 logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2);
317 }
318
319 // Determine decay factor depending on VAD
320 // upper_thr = 1.0f;
321 // lower_thr = 0.25f;
322 upper_thr = 1024; // Q10
323 lower_thr = 0; // Q10
324 if (logratio > upper_thr) {
325 // decay = -2^17 / DecayTime; -> -65
326 decay = -65;
327 } else if (logratio < lower_thr) {
328 decay = 0;
329 } else {
330 // decay = (int16_t)(((lower_thr - logratio)
331 // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
332 // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65
333 tmp32 = (lower_thr - logratio) * 65;
334 decay = (int16_t)(tmp32 >> 10);
335 }
336
337 // adjust decay factor for long silence (detected as low standard deviation)
338 // This is only done in the adaptive modes
339 if (stt->agcMode != kAgcModeFixedDigital) {
340 if (stt->vadNearend.stdLongTerm < 4000) {
341 decay = 0;
342 } else if (stt->vadNearend.stdLongTerm < 8096) {
343 // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >>
344 // 12);
345 tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
346 decay = (int16_t)(tmp32 >> 12);
347 }
348
349 if (lowlevelSignal != 0) {
350 decay = 0;
351 }
352 }
353 // Find max amplitude per sub frame
354 // iterate over sub frames
355 for (k = 0; k < 10; k++) {
356 // iterate over samples
357 max_nrg = 0;
358 for (n = 0; n < L; n++) {
359 int32_t nrg = in_near[0][k * L + n] * in_near[0][k * L + n];
360 if (nrg > max_nrg) {
361 max_nrg = nrg;
362 }
363 }
364 env[k] = max_nrg;
365 }
366
367 // Calculate gain per sub frame
368 gains[0] = stt->gain;
369 for (k = 0; k < 10; k++) {
370 // Fast envelope follower
371 // decay time = -131000 / -1000 = 131 (ms)
372 stt->capacitorFast =
373 AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast);
374 if (env[k] > stt->capacitorFast) {
375 stt->capacitorFast = env[k];
376 }
377 // Slow envelope follower
378 if (env[k] > stt->capacitorSlow) {
379 // increase capacitorSlow
380 stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow),
381 stt->capacitorSlow);
382 } else {
383 // decrease capacitorSlow
384 stt->capacitorSlow =
385 AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow);
386 }
387
388 // use maximum of both capacitors as current level
389 if (stt->capacitorFast > stt->capacitorSlow) {
390 cur_level = stt->capacitorFast;
391 } else {
392 cur_level = stt->capacitorSlow;
393 }
394 // Translate signal level into gain, using a piecewise linear approximation
395 // find number of leading zeros
396 zeros = WebRtcSpl_NormU32((uint32_t)cur_level);
397 if (cur_level == 0) {
398 zeros = 31;
399 }
400 tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF;
401 frac = (int16_t)(tmp32 >> 19); // Q12.
402 // Interpolate between gainTable[zeros] and gainTable[zeros-1].
403 tmp32 =
404 ((stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * (int64_t)frac) >>
405 12;
406 gains[k + 1] = stt->gainTable[zeros] + tmp32;
407 }
408
409 // Gate processing (lower gain during absence of speech)
410 zeros = (zeros << 9) - (frac >> 3);
411 // find number of leading zeros
412 zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast);
413 if (stt->capacitorFast == 0) {
414 zeros_fast = 31;
415 }
416 tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
417 zeros_fast <<= 9;
418 zeros_fast -= (int16_t)(tmp32 >> 22);
419
420 gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm;
421
422 if (gate < 0) {
423 stt->gatePrevious = 0;
424 } else {
425 tmp32 = stt->gatePrevious * 7;
426 gate = (int16_t)((gate + tmp32) >> 3);
427 stt->gatePrevious = gate;
428 }
429 // gate < 0 -> no gate
430 // gate > 2500 -> max gate
431 if (gate > 0) {
432 if (gate < 2500) {
433 gain_adj = (2500 - gate) >> 5;
434 } else {
435 gain_adj = 0;
436 }
437 for (k = 0; k < 10; k++) {
438 if ((gains[k + 1] - stt->gainTable[0]) > 8388608) {
439 // To prevent wraparound
440 tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
441 tmp32 *= 178 + gain_adj;
442 } else {
443 tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj);
444 tmp32 >>= 8;
445 }
446 gains[k + 1] = stt->gainTable[0] + tmp32;
447 }
448 }
449
450 // Limit gain to avoid overload distortion
451 for (k = 0; k < 10; k++) {
452 // Find a shift of gains[k + 1] such that it can be squared without
453 // overflow, but at least by 10 bits.
454 zeros = 10;
455 if (gains[k + 1] > 47452159) {
456 zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]);
457 }
458 gain32 = (gains[k + 1] >> zeros) + 1;
459 gain32 *= gain32;
460 // check for overflow
461 while (AGC_MUL32((env[k] >> 12) + 1, gain32) >
462 WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) {
463 // multiply by 253/256 ==> -0.1 dB
464 if (gains[k + 1] > 8388607) {
465 // Prevent wrap around
466 gains[k + 1] = (gains[k + 1] / 256) * 253;
467 } else {
468 gains[k + 1] = (gains[k + 1] * 253) / 256;
469 }
470 gain32 = (gains[k + 1] >> zeros) + 1;
471 gain32 *= gain32;
472 }
473 }
474 // gain reductions should be done 1 ms earlier than gain increases
475 for (k = 1; k < 10; k++) {
476 if (gains[k] > gains[k + 1]) {
477 gains[k] = gains[k + 1];
478 }
479 }
480 // save start gain for next frame
481 stt->gain = gains[10];
482
483 return 0;
484 }
485
WebRtcAgc_ApplyDigitalGains(const int32_t gains[11],size_t num_bands,uint32_t FS,const int16_t * const * in_near,int16_t * const * out)486 int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11],
487 size_t num_bands,
488 uint32_t FS,
489 const int16_t* const* in_near,
490 int16_t* const* out) {
491 // Apply gain
492 // handle first sub frame separately
493 size_t L;
494 int16_t L2; // samples/subframe
495
496 // determine number of samples per ms
497 if (FS == 8000) {
498 L = 8;
499 L2 = 3;
500 } else if (FS == 16000 || FS == 32000 || FS == 48000) {
501 L = 16;
502 L2 = 4;
503 } else {
504 return -1;
505 }
506
507 for (size_t i = 0; i < num_bands; ++i) {
508 if (in_near[i] != out[i]) {
509 // Only needed if they don't already point to the same place.
510 memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
511 }
512 }
513
514 // iterate over samples
515 int32_t delta = (gains[1] - gains[0]) * (1 << (4 - L2));
516 int32_t gain32 = gains[0] * (1 << 4);
517 for (size_t n = 0; n < L; n++) {
518 for (size_t i = 0; i < num_bands; ++i) {
519 int32_t out_tmp = (int64_t)out[i][n] * ((gain32 + 127) >> 7) >> 16;
520 if (out_tmp > 4095) {
521 out[i][n] = (int16_t)32767;
522 } else if (out_tmp < -4096) {
523 out[i][n] = (int16_t)-32768;
524 } else {
525 int32_t tmp32 = ((int64_t)out[i][n] * (gain32 >> 4)) >> 16;
526 out[i][n] = (int16_t)tmp32;
527 }
528 }
529
530 gain32 += delta;
531 }
532 // iterate over subframes
533 for (int k = 1; k < 10; k++) {
534 delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2));
535 gain32 = gains[k] * (1 << 4);
536 // iterate over samples
537 for (size_t n = 0; n < L; n++) {
538 for (size_t i = 0; i < num_bands; ++i) {
539 int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4);
540 tmp64 = tmp64 >> 16;
541 if (tmp64 > 32767) {
542 out[i][k * L + n] = 32767;
543 } else if (tmp64 < -32768) {
544 out[i][k * L + n] = -32768;
545 } else {
546 out[i][k * L + n] = (int16_t)(tmp64);
547 }
548 }
549 gain32 += delta;
550 }
551 }
552 return 0;
553 }
554
WebRtcAgc_InitVad(AgcVad * state)555 void WebRtcAgc_InitVad(AgcVad* state) {
556 int16_t k;
557
558 state->HPstate = 0; // state of high pass filter
559 state->logRatio = 0; // log( P(active) / P(inactive) )
560 // average input level (Q10)
561 state->meanLongTerm = 15 << 10;
562
563 // variance of input level (Q8)
564 state->varianceLongTerm = 500 << 8;
565
566 state->stdLongTerm = 0; // standard deviation of input level in dB
567 // short-term average input level (Q10)
568 state->meanShortTerm = 15 << 10;
569
570 // short-term variance of input level (Q8)
571 state->varianceShortTerm = 500 << 8;
572
573 state->stdShortTerm =
574 0; // short-term standard deviation of input level in dB
575 state->counter = 3; // counts updates
576 for (k = 0; k < 8; k++) {
577 // downsampling filter
578 state->downState[k] = 0;
579 }
580 }
581
WebRtcAgc_ProcessVad(AgcVad * state,const int16_t * in,size_t nrSamples)582 int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state
583 const int16_t* in, // (i) Speech signal
584 size_t nrSamples) { // (i) number of samples
585 uint32_t nrg;
586 int32_t out, tmp32, tmp32b;
587 uint16_t tmpU16;
588 int16_t k, subfr, tmp16;
589 int16_t buf1[8];
590 int16_t buf2[4];
591 int16_t HPstate;
592 int16_t zeros, dB;
593 int64_t tmp64;
594
595 // process in 10 sub frames of 1 ms (to save on memory)
596 nrg = 0;
597 HPstate = state->HPstate;
598 for (subfr = 0; subfr < 10; subfr++) {
599 // downsample to 4 kHz
600 if (nrSamples == 160) {
601 for (k = 0; k < 8; k++) {
602 tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1];
603 tmp32 >>= 1;
604 buf1[k] = (int16_t)tmp32;
605 }
606 in += 16;
607
608 WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState);
609 } else {
610 WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState);
611 in += 8;
612 }
613
614 // high pass filter and compute energy
615 for (k = 0; k < 4; k++) {
616 out = buf2[k] + HPstate;
617 tmp32 = 600 * out;
618 HPstate = (int16_t)((tmp32 >> 10) - buf2[k]);
619
620 // Add 'out * out / 2**6' to 'nrg' in a non-overflowing
621 // way. Guaranteed to work as long as 'out * out / 2**6' fits in
622 // an int32_t.
623 nrg += out * (out / (1 << 6));
624 nrg += out * (out % (1 << 6)) / (1 << 6);
625 }
626 }
627 state->HPstate = HPstate;
628
629 // find number of leading zeros
630 if (!(0xFFFF0000 & nrg)) {
631 zeros = 16;
632 } else {
633 zeros = 0;
634 }
635 if (!(0xFF000000 & (nrg << zeros))) {
636 zeros += 8;
637 }
638 if (!(0xF0000000 & (nrg << zeros))) {
639 zeros += 4;
640 }
641 if (!(0xC0000000 & (nrg << zeros))) {
642 zeros += 2;
643 }
644 if (!(0x80000000 & (nrg << zeros))) {
645 zeros += 1;
646 }
647
648 // energy level (range {-32..30}) (Q10)
649 dB = (15 - zeros) * (1 << 11);
650
651 // Update statistics
652
653 if (state->counter < kAvgDecayTime) {
654 // decay time = AvgDecTime * 10 ms
655 state->counter++;
656 }
657
658 // update short-term estimate of mean energy level (Q10)
659 tmp32 = state->meanShortTerm * 15 + dB;
660 state->meanShortTerm = (int16_t)(tmp32 >> 4);
661
662 // update short-term estimate of variance in energy level (Q8)
663 tmp32 = (dB * dB) >> 12;
664 tmp32 += state->varianceShortTerm * 15;
665 state->varianceShortTerm = tmp32 / 16;
666
667 // update short-term estimate of standard deviation in energy level (Q10)
668 tmp32 = state->meanShortTerm * state->meanShortTerm;
669 tmp32 = (state->varianceShortTerm << 12) - tmp32;
670 state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
671
672 // update long-term estimate of mean energy level (Q10)
673 tmp32 = state->meanLongTerm * state->counter + dB;
674 state->meanLongTerm =
675 WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
676
677 // update long-term estimate of variance in energy level (Q8)
678 tmp32 = (dB * dB) >> 12;
679 tmp32 += state->varianceLongTerm * state->counter;
680 state->varianceLongTerm =
681 WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
682
683 // update long-term estimate of standard deviation in energy level (Q10)
684 tmp32 = state->meanLongTerm * state->meanLongTerm;
685 tmp32 = (state->varianceLongTerm << 12) - tmp32;
686 state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
687
688 // update voice activity measure (Q10)
689 tmp16 = 3 << 12;
690 // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
691 // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
692 // was used, which did an intermediate cast to (int16_t), hence losing
693 // significant bits. This cause logRatio to max out positive, rather than
694 // negative. This is a bug, but has very little significance.
695 tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm);
696 tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm);
697 tmpU16 = (13 << 12);
698 tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16);
699 tmp64 = tmp32;
700 tmp64 += tmp32b >> 10;
701 tmp64 >>= 6;
702
703 // limit
704 if (tmp64 > 2048) {
705 tmp64 = 2048;
706 } else if (tmp64 < -2048) {
707 tmp64 = -2048;
708 }
709 state->logRatio = (int16_t)tmp64;
710
711 return state->logRatio; // Q10
712 }
713
714 } // namespace webrtc
715