1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/ns/noise_suppression_x.h"
12
13 #include <math.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include "rtc_base/checks.h"
18 #include "common_audio/signal_processing/include/real_fft.h"
19 #include "modules/audio_processing/ns/nsx_core.h"
20 #include "system_wrappers/include/cpu_features_wrapper.h"
21
22 #if defined(WEBRTC_HAS_NEON)
23 /* Tables are defined in ARM assembly files. */
24 extern const int16_t WebRtcNsx_kLogTable[9];
25 extern const int16_t WebRtcNsx_kCounterDiv[201];
26 extern const int16_t WebRtcNsx_kLogTableFrac[256];
27 #else
28 static const int16_t WebRtcNsx_kLogTable[9] = {
29 0, 177, 355, 532, 710, 887, 1065, 1242, 1420
30 };
31
32 static const int16_t WebRtcNsx_kCounterDiv[201] = {
33 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
34 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
35 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
36 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
37 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
38 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
39 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
40 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
41 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
42 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
43 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
44 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
45 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
46 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
47 };
48
49 static const int16_t WebRtcNsx_kLogTableFrac[256] = {
50 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
51 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
52 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
53 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
54 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
55 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
56 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
57 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
58 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
59 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
60 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
61 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
62 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
63 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
64 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
65 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
66 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
67 };
68 #endif // WEBRTC_HAS_NEON
69
70 // Skip first frequency bins during estimation. (0 <= value < 64)
71 static const size_t kStartBand = 5;
72
73 // hybrib Hanning & flat window
74 static const int16_t kBlocks80w128x[128] = {
75 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266,
76 5771, 6270, 6762, 7246, 7723, 8192, 8652, 9102, 9543, 9974, 10394,
77 10803, 11200, 11585, 11958, 12318, 12665, 12998, 13318, 13623, 13913, 14189,
78 14449, 14694, 14924, 15137, 15334, 15515, 15679, 15826, 15956, 16069, 16165,
79 16244, 16305, 16349, 16375, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
80 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
81 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
82 16384, 16384, 16384, 16384, 16375, 16349, 16305, 16244, 16165, 16069, 15956,
83 15826, 15679, 15515, 15334, 15137, 14924, 14694, 14449, 14189, 13913, 13623,
84 13318, 12998, 12665, 12318, 11958, 11585, 11200, 10803, 10394, 9974, 9543,
85 9102, 8652, 8192, 7723, 7246, 6762, 6270, 5771, 5266, 4756, 4240,
86 3720, 3196, 2669, 2139, 1606, 1072, 536
87 };
88
89 // hybrib Hanning & flat window
90 static const int16_t kBlocks160w256x[256] = {
91 0, 268, 536, 804, 1072, 1339, 1606, 1872,
92 2139, 2404, 2669, 2933, 3196, 3459, 3720, 3981,
93 4240, 4499, 4756, 5012, 5266, 5520, 5771, 6021,
94 6270, 6517, 6762, 7005, 7246, 7486, 7723, 7959,
95 8192, 8423, 8652, 8878, 9102, 9324, 9543, 9760,
96 9974, 10185, 10394, 10600, 10803, 11003, 11200, 11394,
97 11585, 11773, 11958, 12140, 12318, 12493, 12665, 12833,
98 12998, 13160, 13318, 13472, 13623, 13770, 13913, 14053,
99 14189, 14321, 14449, 14574, 14694, 14811, 14924, 15032,
100 15137, 15237, 15334, 15426, 15515, 15599, 15679, 15754,
101 15826, 15893, 15956, 16015, 16069, 16119, 16165, 16207,
102 16244, 16277, 16305, 16329, 16349, 16364, 16375, 16382,
103 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
104 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
105 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
106 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
107 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
108 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
109 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
110 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
111 16384, 16382, 16375, 16364, 16349, 16329, 16305, 16277,
112 16244, 16207, 16165, 16119, 16069, 16015, 15956, 15893,
113 15826, 15754, 15679, 15599, 15515, 15426, 15334, 15237,
114 15137, 15032, 14924, 14811, 14694, 14574, 14449, 14321,
115 14189, 14053, 13913, 13770, 13623, 13472, 13318, 13160,
116 12998, 12833, 12665, 12493, 12318, 12140, 11958, 11773,
117 11585, 11394, 11200, 11003, 10803, 10600, 10394, 10185,
118 9974, 9760, 9543, 9324, 9102, 8878, 8652, 8423,
119 8192, 7959, 7723, 7486, 7246, 7005, 6762, 6517,
120 6270, 6021, 5771, 5520, 5266, 5012, 4756, 4499,
121 4240, 3981, 3720, 3459, 3196, 2933, 2669, 2404,
122 2139, 1872, 1606, 1339, 1072, 804, 536, 268
123 };
124
125 // Gain factor1 table: Input value in Q8 and output value in Q13
126 // original floating point code
127 // if (gain > blim) {
128 // factor1 = 1.0 + 1.3 * (gain - blim);
129 // if (gain * factor1 > 1.0) {
130 // factor1 = 1.0 / gain;
131 // }
132 // } else {
133 // factor1 = 1.0;
134 // }
135 static const int16_t kFactor1Table[257] = {
136 8192, 8192, 8192, 8192, 8192, 8192, 8192,
137 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
138 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
139 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
140 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
141 8192, 8192, 8233, 8274, 8315, 8355, 8396, 8436, 8475, 8515, 8554, 8592, 8631, 8669,
142 8707, 8745, 8783, 8820, 8857, 8894, 8931, 8967, 9003, 9039, 9075, 9111, 9146, 9181,
143 9216, 9251, 9286, 9320, 9354, 9388, 9422, 9456, 9489, 9523, 9556, 9589, 9622, 9655,
144 9687, 9719, 9752, 9784, 9816, 9848, 9879, 9911, 9942, 9973, 10004, 10035, 10066,
145 10097, 10128, 10158, 10188, 10218, 10249, 10279, 10308, 10338, 10368, 10397, 10426,
146 10456, 10485, 10514, 10543, 10572, 10600, 10629, 10657, 10686, 10714, 10742, 10770,
147 10798, 10826, 10854, 10882, 10847, 10810, 10774, 10737, 10701, 10666, 10631, 10596,
148 10562, 10527, 10494, 10460, 10427, 10394, 10362, 10329, 10297, 10266, 10235, 10203,
149 10173, 10142, 10112, 10082, 10052, 10023, 9994, 9965, 9936, 9908, 9879, 9851, 9824,
150 9796, 9769, 9742, 9715, 9689, 9662, 9636, 9610, 9584, 9559, 9534, 9508, 9484, 9459,
151 9434, 9410, 9386, 9362, 9338, 9314, 9291, 9268, 9245, 9222, 9199, 9176, 9154, 9132,
152 9110, 9088, 9066, 9044, 9023, 9002, 8980, 8959, 8939, 8918, 8897, 8877, 8857, 8836,
153 8816, 8796, 8777, 8757, 8738, 8718, 8699, 8680, 8661, 8642, 8623, 8605, 8586, 8568,
154 8550, 8532, 8514, 8496, 8478, 8460, 8443, 8425, 8408, 8391, 8373, 8356, 8339, 8323,
155 8306, 8289, 8273, 8256, 8240, 8224, 8208, 8192
156 };
157
158 // For Factor2 tables
159 // original floating point code
160 // if (gain > blim) {
161 // factor2 = 1.0;
162 // } else {
163 // factor2 = 1.0 - 0.3 * (blim - gain);
164 // if (gain <= inst->denoiseBound) {
165 // factor2 = 1.0 - 0.3 * (blim - inst->denoiseBound);
166 // }
167 // }
168 //
169 // Gain factor table: Input value in Q8 and output value in Q13
170 static const int16_t kFactor2Aggressiveness1[257] = {
171 7577, 7577, 7577, 7577, 7577, 7577,
172 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632,
173 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
174 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
175 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
176 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
177 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
178 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
179 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
180 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
181 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
182 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
183 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
184 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
185 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
186 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
187 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
188 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
189 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
190 };
191
192 // Gain factor table: Input value in Q8 and output value in Q13
193 static const int16_t kFactor2Aggressiveness2[257] = {
194 7270, 7270, 7270, 7270, 7270, 7306,
195 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632,
196 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
197 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
198 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
199 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
200 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
201 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
202 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
203 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
204 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
205 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
206 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
207 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
208 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
209 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
210 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
211 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
212 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
213 };
214
215 // Gain factor table: Input value in Q8 and output value in Q13
216 static const int16_t kFactor2Aggressiveness3[257] = {
217 7184, 7184, 7184, 7229, 7270, 7306,
218 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632,
219 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
220 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
221 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
222 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
223 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
224 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
225 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
226 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
227 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
228 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
229 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
230 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
231 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
232 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
233 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
234 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
235 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
236 };
237
238 // sum of log2(i) from table index to inst->anaLen2 in Q5
239 // Note that the first table value is invalid, since log2(0) = -infinity
240 static const int16_t kSumLogIndex[66] = {
241 0, 22917, 22917, 22885, 22834, 22770, 22696, 22613,
242 22524, 22428, 22326, 22220, 22109, 21994, 21876, 21754,
243 21629, 21501, 21370, 21237, 21101, 20963, 20822, 20679,
244 20535, 20388, 20239, 20089, 19937, 19783, 19628, 19470,
245 19312, 19152, 18991, 18828, 18664, 18498, 18331, 18164,
246 17994, 17824, 17653, 17480, 17306, 17132, 16956, 16779,
247 16602, 16423, 16243, 16063, 15881, 15699, 15515, 15331,
248 15146, 14960, 14774, 14586, 14398, 14209, 14019, 13829,
249 13637, 13445
250 };
251
252 // sum of log2(i)^2 from table index to inst->anaLen2 in Q2
253 // Note that the first table value is invalid, since log2(0) = -infinity
254 static const int16_t kSumSquareLogIndex[66] = {
255 0, 16959, 16959, 16955, 16945, 16929, 16908, 16881,
256 16850, 16814, 16773, 16729, 16681, 16630, 16575, 16517,
257 16456, 16392, 16325, 16256, 16184, 16109, 16032, 15952,
258 15870, 15786, 15700, 15612, 15521, 15429, 15334, 15238,
259 15140, 15040, 14938, 14834, 14729, 14622, 14514, 14404,
260 14292, 14179, 14064, 13947, 13830, 13710, 13590, 13468,
261 13344, 13220, 13094, 12966, 12837, 12707, 12576, 12444,
262 12310, 12175, 12039, 11902, 11763, 11624, 11483, 11341,
263 11198, 11054
264 };
265
266 // log2(table index) in Q12
267 // Note that the first table value is invalid, since log2(0) = -infinity
268 static const int16_t kLogIndex[129] = {
269 0, 0, 4096, 6492, 8192, 9511, 10588, 11499,
270 12288, 12984, 13607, 14170, 14684, 15157, 15595, 16003,
271 16384, 16742, 17080, 17400, 17703, 17991, 18266, 18529,
272 18780, 19021, 19253, 19476, 19691, 19898, 20099, 20292,
273 20480, 20662, 20838, 21010, 21176, 21338, 21496, 21649,
274 21799, 21945, 22087, 22226, 22362, 22495, 22625, 22752,
275 22876, 22998, 23117, 23234, 23349, 23462, 23572, 23680,
276 23787, 23892, 23994, 24095, 24195, 24292, 24388, 24483,
277 24576, 24668, 24758, 24847, 24934, 25021, 25106, 25189,
278 25272, 25354, 25434, 25513, 25592, 25669, 25745, 25820,
279 25895, 25968, 26041, 26112, 26183, 26253, 26322, 26390,
280 26458, 26525, 26591, 26656, 26721, 26784, 26848, 26910,
281 26972, 27033, 27094, 27154, 27213, 27272, 27330, 27388,
282 27445, 27502, 27558, 27613, 27668, 27722, 27776, 27830,
283 27883, 27935, 27988, 28039, 28090, 28141, 28191, 28241,
284 28291, 28340, 28388, 28437, 28484, 28532, 28579, 28626,
285 28672
286 };
287
288 // determinant of estimation matrix in Q0 corresponding to the log2 tables above
289 // Note that the first table value is invalid, since log2(0) = -infinity
290 static const int16_t kDeterminantEstMatrix[66] = {
291 0, 29814, 25574, 22640, 20351, 18469, 16873, 15491,
292 14277, 13199, 12233, 11362, 10571, 9851, 9192, 8587,
293 8030, 7515, 7038, 6596, 6186, 5804, 5448, 5115,
294 4805, 4514, 4242, 3988, 3749, 3524, 3314, 3116,
295 2930, 2755, 2590, 2435, 2289, 2152, 2022, 1900,
296 1785, 1677, 1575, 1478, 1388, 1302, 1221, 1145,
297 1073, 1005, 942, 881, 825, 771, 721, 674,
298 629, 587, 547, 510, 475, 442, 411, 382,
299 355, 330
300 };
301
302 // Update the noise estimation information.
UpdateNoiseEstimate(NoiseSuppressionFixedC * inst,int offset)303 static void UpdateNoiseEstimate(NoiseSuppressionFixedC* inst, int offset) {
304 int32_t tmp32no1 = 0;
305 int32_t tmp32no2 = 0;
306 int16_t tmp16 = 0;
307 const int16_t kExp2Const = 11819; // Q13
308
309 size_t i = 0;
310
311 tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
312 inst->magnLen);
313 // Guarantee a Q-domain as high as possible and still fit in int16
314 inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
315 kExp2Const, tmp16, 21);
316 for (i = 0; i < inst->magnLen; i++) {
317 // inst->quantile[i]=exp(inst->lquantile[offset+i]);
318 // in Q21
319 tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
320 tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
321 tmp16 = (int16_t)(tmp32no2 >> 21);
322 tmp16 -= 21;// shift 21 to get result in Q0
323 tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
324 if (tmp16 < 0) {
325 tmp32no1 >>= -tmp16;
326 } else {
327 tmp32no1 <<= tmp16;
328 }
329 inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1);
330 }
331 }
332
333 // Noise Estimation
NoiseEstimationC(NoiseSuppressionFixedC * inst,uint16_t * magn,uint32_t * noise,int16_t * q_noise)334 static void NoiseEstimationC(NoiseSuppressionFixedC* inst,
335 uint16_t* magn,
336 uint32_t* noise,
337 int16_t* q_noise) {
338 int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
339 int16_t countProd, delta, zeros, frac;
340 int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
341 const int16_t log2_const = 22713; // Q15
342 const int16_t width_factor = 21845;
343
344 size_t i, s, offset;
345
346 tabind = inst->stages - inst->normData;
347 RTC_DCHECK_LT(tabind, 9);
348 RTC_DCHECK_GT(tabind, -9);
349 if (tabind < 0) {
350 logval = -WebRtcNsx_kLogTable[-tabind];
351 } else {
352 logval = WebRtcNsx_kLogTable[tabind];
353 }
354
355 // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
356 // magn is in Q(-stages), and the real lmagn values are:
357 // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
358 // lmagn in Q8
359 for (i = 0; i < inst->magnLen; i++) {
360 if (magn[i]) {
361 zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
362 frac = (int16_t)((((uint32_t)magn[i] << zeros)
363 & 0x7FFFFFFF) >> 23);
364 // log2(magn(i))
365 RTC_DCHECK_LT(frac, 256);
366 log2 = (int16_t)(((31 - zeros) << 8)
367 + WebRtcNsx_kLogTableFrac[frac]);
368 // log2(magn(i))*log(2)
369 lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
370 // + log(2^stages)
371 lmagn[i] += logval;
372 } else {
373 lmagn[i] = logval;//0;
374 }
375 }
376
377 // loop over simultaneous estimates
378 for (s = 0; s < SIMULT; s++) {
379 offset = s * inst->magnLen;
380
381 // Get counter values from state
382 counter = inst->noiseEstCounter[s];
383 RTC_DCHECK_LT(counter, 201);
384 countDiv = WebRtcNsx_kCounterDiv[counter];
385 countProd = (int16_t)(counter * countDiv);
386
387 // quant_est(...)
388 for (i = 0; i < inst->magnLen; i++) {
389 // compute delta
390 if (inst->noiseEstDensity[offset + i] > 512) {
391 // Get the value for delta by shifting intead of dividing.
392 int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
393 delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
394 } else {
395 delta = FACTOR_Q7;
396 if (inst->blockIndex < END_STARTUP_LONG) {
397 // Smaller step size during startup. This prevents from using
398 // unrealistic values causing overflow.
399 delta = FACTOR_Q7_STARTUP;
400 }
401 }
402
403 // update log quantile estimate
404 tmp16 = (int16_t)((delta * countDiv) >> 14);
405 if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
406 // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
407 // CounterDiv=1/(inst->counter[s]+1) in Q15
408 tmp16 += 2;
409 inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
410 } else {
411 tmp16 += 1;
412 // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
413 // TODO(bjornv): investigate why we need to truncate twice.
414 tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
415 inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
416 if (inst->noiseEstLogQuantile[offset + i] < logval) {
417 // This is the smallest fixed point representation we can
418 // have, hence we limit the output.
419 inst->noiseEstLogQuantile[offset + i] = logval;
420 }
421 }
422
423 // update density estimate
424 if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
425 < WIDTH_Q8) {
426 tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
427 inst->noiseEstDensity[offset + i], countProd, 15);
428 tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
429 width_factor, countDiv, 15);
430 inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
431 }
432 } // end loop over magnitude spectrum
433
434 if (counter >= END_STARTUP_LONG) {
435 inst->noiseEstCounter[s] = 0;
436 if (inst->blockIndex >= END_STARTUP_LONG) {
437 UpdateNoiseEstimate(inst, offset);
438 }
439 }
440 inst->noiseEstCounter[s]++;
441
442 } // end loop over simultaneous estimates
443
444 // Sequentially update the noise during startup
445 if (inst->blockIndex < END_STARTUP_LONG) {
446 UpdateNoiseEstimate(inst, offset);
447 }
448
449 for (i = 0; i < inst->magnLen; i++) {
450 noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
451 }
452 (*q_noise) = (int16_t)inst->qNoise;
453 }
454
455 // Filter the data in the frequency domain, and create spectrum.
PrepareSpectrumC(NoiseSuppressionFixedC * inst,int16_t * freq_buf)456 static void PrepareSpectrumC(NoiseSuppressionFixedC* inst, int16_t* freq_buf) {
457 size_t i = 0, j = 0;
458
459 for (i = 0; i < inst->magnLen; i++) {
460 inst->real[i] = (int16_t)((inst->real[i] *
461 (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
462 inst->imag[i] = (int16_t)((inst->imag[i] *
463 (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
464 }
465
466 freq_buf[0] = inst->real[0];
467 freq_buf[1] = -inst->imag[0];
468 for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
469 freq_buf[j] = inst->real[i];
470 freq_buf[j + 1] = -inst->imag[i];
471 }
472 freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
473 freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
474 }
475
476 // Denormalize the real-valued signal |in|, the output from inverse FFT.
DenormalizeC(NoiseSuppressionFixedC * inst,int16_t * in,int factor)477 static void DenormalizeC(NoiseSuppressionFixedC* inst,
478 int16_t* in,
479 int factor) {
480 size_t i = 0;
481 int32_t tmp32 = 0;
482 for (i = 0; i < inst->anaLen; i += 1) {
483 tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i],
484 factor - inst->normData);
485 inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
486 }
487 }
488
489 // For the noise supression process, synthesis, read out fully processed
490 // segment, and update synthesis buffer.
SynthesisUpdateC(NoiseSuppressionFixedC * inst,int16_t * out_frame,int16_t gain_factor)491 static void SynthesisUpdateC(NoiseSuppressionFixedC* inst,
492 int16_t* out_frame,
493 int16_t gain_factor) {
494 size_t i = 0;
495 int16_t tmp16a = 0;
496 int16_t tmp16b = 0;
497 int32_t tmp32 = 0;
498
499 // synthesis
500 for (i = 0; i < inst->anaLen; i++) {
501 tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
502 inst->window[i], inst->real[i], 14); // Q0, window in Q14
503 tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0
504 // Down shift with rounding
505 tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
506 inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(inst->synthesisBuffer[i],
507 tmp16b); // Q0
508 }
509
510 // read out fully processed segment
511 for (i = 0; i < inst->blockLen10ms; i++) {
512 out_frame[i] = inst->synthesisBuffer[i]; // Q0
513 }
514
515 // update synthesis buffer
516 memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
517 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
518 WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
519 + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
520 }
521
522 // Update analysis buffer for lower band, and window data before FFT.
AnalysisUpdateC(NoiseSuppressionFixedC * inst,int16_t * out,int16_t * new_speech)523 static void AnalysisUpdateC(NoiseSuppressionFixedC* inst,
524 int16_t* out,
525 int16_t* new_speech) {
526 size_t i = 0;
527
528 // For lower band update analysis buffer.
529 memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
530 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
531 memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
532 inst->blockLen10ms * sizeof(*inst->analysisBuffer));
533
534 // Window data before FFT.
535 for (i = 0; i < inst->anaLen; i++) {
536 out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
537 inst->window[i], inst->analysisBuffer[i], 14); // Q0
538 }
539 }
540
541 // Normalize the real-valued signal |in|, the input to forward FFT.
NormalizeRealBufferC(NoiseSuppressionFixedC * inst,const int16_t * in,int16_t * out)542 static void NormalizeRealBufferC(NoiseSuppressionFixedC* inst,
543 const int16_t* in,
544 int16_t* out) {
545 size_t i = 0;
546 RTC_DCHECK_GE(inst->normData, 0);
547 for (i = 0; i < inst->anaLen; ++i) {
548 out[i] = in[i] << inst->normData; // Q(normData)
549 }
550 }
551
552 // Declare function pointers.
553 NoiseEstimation WebRtcNsx_NoiseEstimation;
554 PrepareSpectrum WebRtcNsx_PrepareSpectrum;
555 SynthesisUpdate WebRtcNsx_SynthesisUpdate;
556 AnalysisUpdate WebRtcNsx_AnalysisUpdate;
557 Denormalize WebRtcNsx_Denormalize;
558 NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
559
560 #if defined(WEBRTC_HAS_NEON)
561 // Initialize function pointers for ARM Neon platform.
WebRtcNsx_InitNeon(void)562 static void WebRtcNsx_InitNeon(void) {
563 WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
564 WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
565 WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
566 WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
567 }
568 #endif
569
570 #if defined(MIPS32_LE)
571 // Initialize function pointers for MIPS platform.
WebRtcNsx_InitMips(void)572 static void WebRtcNsx_InitMips(void) {
573 WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips;
574 WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips;
575 WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips;
576 WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips;
577 #if defined(MIPS_DSP_R1_LE)
578 WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips;
579 #endif
580 }
581 #endif
582
WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC * inst,int16_t pink_noise_exp_avg,int32_t pink_noise_num_avg,int freq_index,uint32_t * noise_estimate,uint32_t * noise_estimate_avg)583 void WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC* inst,
584 int16_t pink_noise_exp_avg,
585 int32_t pink_noise_num_avg,
586 int freq_index,
587 uint32_t* noise_estimate,
588 uint32_t* noise_estimate_avg) {
589 int32_t tmp32no1 = 0;
590 int32_t tmp32no2 = 0;
591
592 int16_t int_part = 0;
593 int16_t frac_part = 0;
594
595 // Use pink noise estimate
596 // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j))
597 RTC_DCHECK_GE(freq_index, 0);
598 RTC_DCHECK_LT(freq_index, 129);
599 tmp32no2 = (pink_noise_exp_avg * kLogIndex[freq_index]) >> 15; // Q11
600 tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11
601
602 // Calculate output: 2^tmp32no1
603 // Output in Q(minNorm-stages)
604 tmp32no1 += (inst->minNorm - inst->stages) << 11;
605 if (tmp32no1 > 0) {
606 int_part = (int16_t)(tmp32no1 >> 11);
607 frac_part = (int16_t)(tmp32no1 & 0x000007ff); // Q11
608 // Piecewise linear approximation of 'b' in
609 // 2^(int_part+frac_part) = 2^int_part * (1 + b)
610 // 'b' is given in Q11 and below stored in frac_part.
611 if (frac_part >> 10) {
612 // Upper fractional part
613 tmp32no2 = (2048 - frac_part) * 1244; // Q21
614 tmp32no2 = 2048 - (tmp32no2 >> 10);
615 } else {
616 // Lower fractional part
617 tmp32no2 = (frac_part * 804) >> 10;
618 }
619 // Shift fractional part to Q(minNorm-stages)
620 tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11);
621 *noise_estimate_avg = (1 << int_part) + (uint32_t)tmp32no2;
622 // Scale up to initMagnEst, which is not block averaged
623 *noise_estimate = (*noise_estimate_avg) * (uint32_t)(inst->blockIndex + 1);
624 }
625 }
626
627 // Initialize state
WebRtcNsx_InitCore(NoiseSuppressionFixedC * inst,uint32_t fs)628 int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs) {
629 int i;
630
631 //check for valid pointer
632 if (inst == NULL) {
633 return -1;
634 }
635 //
636
637 // Initialization of struct
638 if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
639 inst->fs = fs;
640 } else {
641 return -1;
642 }
643
644 if (fs == 8000) {
645 inst->blockLen10ms = 80;
646 inst->anaLen = 128;
647 inst->stages = 7;
648 inst->window = kBlocks80w128x;
649 inst->thresholdLogLrt = 131072; //default threshold for LRT feature
650 inst->maxLrt = 0x0040000;
651 inst->minLrt = 52429;
652 } else {
653 inst->blockLen10ms = 160;
654 inst->anaLen = 256;
655 inst->stages = 8;
656 inst->window = kBlocks160w256x;
657 inst->thresholdLogLrt = 212644; //default threshold for LRT feature
658 inst->maxLrt = 0x0080000;
659 inst->minLrt = 104858;
660 }
661 inst->anaLen2 = inst->anaLen / 2;
662 inst->magnLen = inst->anaLen2 + 1;
663
664 if (inst->real_fft != NULL) {
665 WebRtcSpl_FreeRealFFT(inst->real_fft);
666 }
667 inst->real_fft = WebRtcSpl_CreateRealFFT(inst->stages);
668 if (inst->real_fft == NULL) {
669 return -1;
670 }
671
672 WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX);
673 WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX);
674
675 // for HB processing
676 WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0],
677 NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
678 // for quantile noise estimation
679 WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL);
680 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
681 inst->noiseEstLogQuantile[i] = 2048; // Q8
682 inst->noiseEstDensity[i] = 153; // Q9
683 }
684 for (i = 0; i < SIMULT; i++) {
685 inst->noiseEstCounter[i] = (int16_t)(END_STARTUP_LONG * (i + 1)) / SIMULT;
686 }
687
688 // Initialize suppression filter with ones
689 WebRtcSpl_MemSetW16((int16_t*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL);
690
691 // Set the aggressiveness: default
692 inst->aggrMode = 0;
693
694 //initialize variables for new method
695 inst->priorNonSpeechProb = 8192; // Q14(0.5) prior probability for speech/noise
696 for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
697 inst->prevMagnU16[i] = 0;
698 inst->prevNoiseU32[i] = 0; //previous noise-spectrum
699 inst->logLrtTimeAvgW32[i] = 0; //smooth LR ratio
700 inst->avgMagnPause[i] = 0; //conservative noise spectrum estimate
701 inst->initMagnEst[i] = 0; //initial average magnitude spectrum
702 }
703
704 //feature quantities
705 inst->thresholdSpecDiff = 50; //threshold for difference feature: determined on-line
706 inst->thresholdSpecFlat = 20480; //threshold for flatness: determined on-line
707 inst->featureLogLrt = inst->thresholdLogLrt; //average LRT factor (= threshold)
708 inst->featureSpecFlat = inst->thresholdSpecFlat; //spectral flatness (= threshold)
709 inst->featureSpecDiff = inst->thresholdSpecDiff; //spectral difference (= threshold)
710 inst->weightLogLrt = 6; //default weighting par for LRT feature
711 inst->weightSpecFlat = 0; //default weighting par for spectral flatness feature
712 inst->weightSpecDiff = 0; //default weighting par for spectral difference feature
713
714 inst->curAvgMagnEnergy = 0; //window time-average of input magnitude spectrum
715 inst->timeAvgMagnEnergy = 0; //normalization for spectral difference
716 inst->timeAvgMagnEnergyTmp = 0; //normalization for spectral difference
717
718 //histogram quantities: used to estimate/update thresholds for features
719 WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST);
720 WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST);
721 WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST);
722
723 inst->blockIndex = -1; //frame counter
724
725 //inst->modelUpdate = 500; //window for update
726 inst->modelUpdate = (1 << STAT_UPDATES); //window for update
727 inst->cntThresUpdate = 0; //counter feature thresholds updates
728
729 inst->sumMagn = 0;
730 inst->magnEnergy = 0;
731 inst->prevQMagn = 0;
732 inst->qNoise = 0;
733 inst->prevQNoise = 0;
734
735 inst->energyIn = 0;
736 inst->scaleEnergyIn = 0;
737
738 inst->whiteNoiseLevel = 0;
739 inst->pinkNoiseNumerator = 0;
740 inst->pinkNoiseExp = 0;
741 inst->minNorm = 15; // Start with full scale
742 inst->zeroInputSignal = 0;
743
744 //default mode
745 WebRtcNsx_set_policy_core(inst, 0);
746
747 #ifdef NS_FILEDEBUG
748 inst->infile = fopen("indebug.pcm", "wb");
749 inst->outfile = fopen("outdebug.pcm", "wb");
750 inst->file1 = fopen("file1.pcm", "wb");
751 inst->file2 = fopen("file2.pcm", "wb");
752 inst->file3 = fopen("file3.pcm", "wb");
753 inst->file4 = fopen("file4.pcm", "wb");
754 inst->file5 = fopen("file5.pcm", "wb");
755 #endif
756
757 // Initialize function pointers.
758 WebRtcNsx_NoiseEstimation = NoiseEstimationC;
759 WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
760 WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
761 WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
762 WebRtcNsx_Denormalize = DenormalizeC;
763 WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC;
764
765 #if defined(WEBRTC_HAS_NEON)
766 WebRtcNsx_InitNeon();
767 #endif
768
769 #if defined(MIPS32_LE)
770 WebRtcNsx_InitMips();
771 #endif
772
773 inst->initFlag = 1;
774
775 return 0;
776 }
777
WebRtcNsx_set_policy_core(NoiseSuppressionFixedC * inst,int mode)778 int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode) {
779 // allow for modes:0,1,2,3
780 if (mode < 0 || mode > 3) {
781 return -1;
782 }
783
784 inst->aggrMode = mode;
785 if (mode == 0) {
786 inst->overdrive = 256; // Q8(1.0)
787 inst->denoiseBound = 8192; // Q14(0.5)
788 inst->gainMap = 0; // No gain compensation
789 } else if (mode == 1) {
790 inst->overdrive = 256; // Q8(1.0)
791 inst->denoiseBound = 4096; // Q14(0.25)
792 inst->factor2Table = kFactor2Aggressiveness1;
793 inst->gainMap = 1;
794 } else if (mode == 2) {
795 inst->overdrive = 282; // ~= Q8(1.1)
796 inst->denoiseBound = 2048; // Q14(0.125)
797 inst->factor2Table = kFactor2Aggressiveness2;
798 inst->gainMap = 1;
799 } else if (mode == 3) {
800 inst->overdrive = 320; // Q8(1.25)
801 inst->denoiseBound = 1475; // ~= Q14(0.09)
802 inst->factor2Table = kFactor2Aggressiveness3;
803 inst->gainMap = 1;
804 }
805 return 0;
806 }
807
808 // Extract thresholds for feature parameters
809 // histograms are computed over some window_size (given by window_pars)
810 // thresholds and weights are extracted every window
811 // flag 0 means update histogram only, flag 1 means compute the thresholds/weights
812 // threshold and weights are returned in: inst->priorModelPars
WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC * inst,int flag)813 void WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC* inst,
814 int flag) {
815 uint32_t tmpU32;
816 uint32_t histIndex;
817 uint32_t posPeak1SpecFlatFX, posPeak2SpecFlatFX;
818 uint32_t posPeak1SpecDiffFX, posPeak2SpecDiffFX;
819
820 int32_t tmp32;
821 int32_t fluctLrtFX, thresFluctLrtFX;
822 int32_t avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX;
823
824 int16_t j;
825 int16_t numHistLrt;
826
827 int i;
828 int useFeatureSpecFlat, useFeatureSpecDiff, featureSum;
829 int maxPeak1, maxPeak2;
830 int weightPeak1SpecFlat, weightPeak2SpecFlat;
831 int weightPeak1SpecDiff, weightPeak2SpecDiff;
832
833 //update histograms
834 if (!flag) {
835 // LRT
836 // Type casting to UWord32 is safe since negative values will not be wrapped to larger
837 // values than HIST_PAR_EST
838 histIndex = (uint32_t)(inst->featureLogLrt);
839 if (histIndex < HIST_PAR_EST) {
840 inst->histLrt[histIndex]++;
841 }
842 // Spectral flatness
843 // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8
844 histIndex = (inst->featureSpecFlat * 5) >> 8;
845 if (histIndex < HIST_PAR_EST) {
846 inst->histSpecFlat[histIndex]++;
847 }
848 // Spectral difference
849 histIndex = HIST_PAR_EST;
850 if (inst->timeAvgMagnEnergy > 0) {
851 // Guard against division by zero
852 // If timeAvgMagnEnergy == 0 we have no normalizing statistics and
853 // therefore can't update the histogram
854 histIndex = ((inst->featureSpecDiff * 5) >> inst->stages) /
855 inst->timeAvgMagnEnergy;
856 }
857 if (histIndex < HIST_PAR_EST) {
858 inst->histSpecDiff[histIndex]++;
859 }
860 }
861
862 // extract parameters for speech/noise probability
863 if (flag) {
864 useFeatureSpecDiff = 1;
865 //for LRT feature:
866 // compute the average over inst->featureExtractionParams.rangeAvgHistLrt
867 avgHistLrtFX = 0;
868 avgSquareHistLrtFX = 0;
869 numHistLrt = 0;
870 for (i = 0; i < BIN_SIZE_LRT; i++) {
871 j = (2 * i + 1);
872 tmp32 = inst->histLrt[i] * j;
873 avgHistLrtFX += tmp32;
874 numHistLrt += inst->histLrt[i];
875 avgSquareHistLrtFX += tmp32 * j;
876 }
877 avgHistLrtComplFX = avgHistLrtFX;
878 for (; i < HIST_PAR_EST; i++) {
879 j = (2 * i + 1);
880 tmp32 = inst->histLrt[i] * j;
881 avgHistLrtComplFX += tmp32;
882 avgSquareHistLrtFX += tmp32 * j;
883 }
884 fluctLrtFX = avgSquareHistLrtFX * numHistLrt -
885 avgHistLrtFX * avgHistLrtComplFX;
886 thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt;
887 // get threshold for LRT feature:
888 tmpU32 = (FACTOR_1_LRT_DIFF * (uint32_t)avgHistLrtFX);
889 if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) ||
890 (tmpU32 > (uint32_t)(100 * numHistLrt))) {
891 //very low fluctuation, so likely noise
892 inst->thresholdLogLrt = inst->maxLrt;
893 } else {
894 tmp32 = (int32_t)((tmpU32 << (9 + inst->stages)) / numHistLrt /
895 25);
896 // check if value is within min/max range
897 inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt,
898 tmp32,
899 inst->minLrt);
900 }
901 if (fluctLrtFX < thresFluctLrtFX) {
902 // Do not use difference feature if fluctuation of LRT feature is very low:
903 // most likely just noise state
904 useFeatureSpecDiff = 0;
905 }
906
907 // for spectral flatness and spectral difference: compute the main peaks of histogram
908 maxPeak1 = 0;
909 maxPeak2 = 0;
910 posPeak1SpecFlatFX = 0;
911 posPeak2SpecFlatFX = 0;
912 weightPeak1SpecFlat = 0;
913 weightPeak2SpecFlat = 0;
914
915 // peaks for flatness
916 for (i = 0; i < HIST_PAR_EST; i++) {
917 if (inst->histSpecFlat[i] > maxPeak1) {
918 // Found new "first" peak
919 maxPeak2 = maxPeak1;
920 weightPeak2SpecFlat = weightPeak1SpecFlat;
921 posPeak2SpecFlatFX = posPeak1SpecFlatFX;
922
923 maxPeak1 = inst->histSpecFlat[i];
924 weightPeak1SpecFlat = inst->histSpecFlat[i];
925 posPeak1SpecFlatFX = (uint32_t)(2 * i + 1);
926 } else if (inst->histSpecFlat[i] > maxPeak2) {
927 // Found new "second" peak
928 maxPeak2 = inst->histSpecFlat[i];
929 weightPeak2SpecFlat = inst->histSpecFlat[i];
930 posPeak2SpecFlatFX = (uint32_t)(2 * i + 1);
931 }
932 }
933
934 // for spectral flatness feature
935 useFeatureSpecFlat = 1;
936 // merge the two peaks if they are close
937 if ((posPeak1SpecFlatFX - posPeak2SpecFlatFX < LIM_PEAK_SPACE_FLAT_DIFF)
938 && (weightPeak2SpecFlat * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecFlat)) {
939 weightPeak1SpecFlat += weightPeak2SpecFlat;
940 posPeak1SpecFlatFX = (posPeak1SpecFlatFX + posPeak2SpecFlatFX) >> 1;
941 }
942 //reject if weight of peaks is not large enough, or peak value too small
943 if (weightPeak1SpecFlat < THRES_WEIGHT_FLAT_DIFF || posPeak1SpecFlatFX
944 < THRES_PEAK_FLAT) {
945 useFeatureSpecFlat = 0;
946 } else { // if selected, get the threshold
947 // compute the threshold and check if value is within min/max range
948 inst->thresholdSpecFlat = WEBRTC_SPL_SAT(MAX_FLAT_Q10, FACTOR_2_FLAT_Q10
949 * posPeak1SpecFlatFX, MIN_FLAT_Q10); //Q10
950 }
951 // done with flatness feature
952
953 if (useFeatureSpecDiff) {
954 //compute two peaks for spectral difference
955 maxPeak1 = 0;
956 maxPeak2 = 0;
957 posPeak1SpecDiffFX = 0;
958 posPeak2SpecDiffFX = 0;
959 weightPeak1SpecDiff = 0;
960 weightPeak2SpecDiff = 0;
961 // peaks for spectral difference
962 for (i = 0; i < HIST_PAR_EST; i++) {
963 if (inst->histSpecDiff[i] > maxPeak1) {
964 // Found new "first" peak
965 maxPeak2 = maxPeak1;
966 weightPeak2SpecDiff = weightPeak1SpecDiff;
967 posPeak2SpecDiffFX = posPeak1SpecDiffFX;
968
969 maxPeak1 = inst->histSpecDiff[i];
970 weightPeak1SpecDiff = inst->histSpecDiff[i];
971 posPeak1SpecDiffFX = (uint32_t)(2 * i + 1);
972 } else if (inst->histSpecDiff[i] > maxPeak2) {
973 // Found new "second" peak
974 maxPeak2 = inst->histSpecDiff[i];
975 weightPeak2SpecDiff = inst->histSpecDiff[i];
976 posPeak2SpecDiffFX = (uint32_t)(2 * i + 1);
977 }
978 }
979
980 // merge the two peaks if they are close
981 if ((posPeak1SpecDiffFX - posPeak2SpecDiffFX < LIM_PEAK_SPACE_FLAT_DIFF)
982 && (weightPeak2SpecDiff * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecDiff)) {
983 weightPeak1SpecDiff += weightPeak2SpecDiff;
984 posPeak1SpecDiffFX = (posPeak1SpecDiffFX + posPeak2SpecDiffFX) >> 1;
985 }
986 // get the threshold value and check if value is within min/max range
987 inst->thresholdSpecDiff = WEBRTC_SPL_SAT(MAX_DIFF, FACTOR_1_LRT_DIFF
988 * posPeak1SpecDiffFX, MIN_DIFF); //5x bigger
989 //reject if weight of peaks is not large enough
990 if (weightPeak1SpecDiff < THRES_WEIGHT_FLAT_DIFF) {
991 useFeatureSpecDiff = 0;
992 }
993 // done with spectral difference feature
994 }
995
996 // select the weights between the features
997 // inst->priorModelPars[4] is weight for LRT: always selected
998 featureSum = 6 / (1 + useFeatureSpecFlat + useFeatureSpecDiff);
999 inst->weightLogLrt = featureSum;
1000 inst->weightSpecFlat = useFeatureSpecFlat * featureSum;
1001 inst->weightSpecDiff = useFeatureSpecDiff * featureSum;
1002
1003 // set histograms to zero for next update
1004 WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST);
1005 WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST);
1006 WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST);
1007 } // end of flag == 1
1008 }
1009
1010
1011 // Compute spectral flatness on input spectrum
1012 // magn is the magnitude spectrum
1013 // spectral flatness is returned in inst->featureSpecFlat
WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC * inst,uint16_t * magn)1014 void WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC* inst,
1015 uint16_t* magn) {
1016 uint32_t tmpU32;
1017 uint32_t avgSpectralFlatnessNum, avgSpectralFlatnessDen;
1018
1019 int32_t tmp32;
1020 int32_t currentSpectralFlatness, logCurSpectralFlatness;
1021
1022 int16_t zeros, frac, intPart;
1023
1024 size_t i;
1025
1026 // for flatness
1027 avgSpectralFlatnessNum = 0;
1028 avgSpectralFlatnessDen = inst->sumMagn - (uint32_t)magn[0]; // Q(normData-stages)
1029
1030 // compute log of ratio of the geometric to arithmetic mean: check for log(0) case
1031 // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) )
1032 // = exp( sum(log(magn[i]))/N ) * N / sum(magn[i])
1033 // = 2^( sum(log2(magn[i]))/N - (log2(sum(magn[i])) - log2(N)) ) [This is used]
1034 for (i = 1; i < inst->magnLen; i++) {
1035 // First bin is excluded from spectrum measures. Number of bins is now a power of 2
1036 if (magn[i]) {
1037 zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
1038 frac = (int16_t)(((uint32_t)((uint32_t)(magn[i]) << zeros)
1039 & 0x7FFFFFFF) >> 23);
1040 // log2(magn(i))
1041 RTC_DCHECK_LT(frac, 256);
1042 tmpU32 = (uint32_t)(((31 - zeros) << 8)
1043 + WebRtcNsx_kLogTableFrac[frac]); // Q8
1044 avgSpectralFlatnessNum += tmpU32; // Q8
1045 } else {
1046 //if at least one frequency component is zero, treat separately
1047 tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24
1048 inst->featureSpecFlat -= tmpU32 >> 14; // Q10
1049 return;
1050 }
1051 }
1052 //ratio and inverse log: check for case of log(0)
1053 zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen);
1054 frac = (int16_t)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23);
1055 // log2(avgSpectralFlatnessDen)
1056 RTC_DCHECK_LT(frac, 256);
1057 tmp32 = (int32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8
1058 logCurSpectralFlatness = (int32_t)avgSpectralFlatnessNum;
1059 logCurSpectralFlatness += ((int32_t)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1)
1060 logCurSpectralFlatness -= (tmp32 << (inst->stages - 1));
1061 logCurSpectralFlatness <<= (10 - inst->stages); // Q17
1062 tmp32 = (int32_t)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness)
1063 & 0x0001FFFF)); //Q17
1064 intPart = 7 - (logCurSpectralFlatness >> 17); // Add 7 for output in Q10.
1065 if (intPart > 0) {
1066 currentSpectralFlatness = tmp32 >> intPart;
1067 } else {
1068 currentSpectralFlatness = tmp32 << -intPart;
1069 }
1070
1071 //time average update of spectral flatness feature
1072 tmp32 = currentSpectralFlatness - (int32_t)inst->featureSpecFlat; // Q10
1073 tmp32 *= SPECT_FLAT_TAVG_Q14; // Q24
1074 inst->featureSpecFlat += tmp32 >> 14; // Q10
1075 // done with flatness feature
1076 }
1077
1078
1079 // Compute the difference measure between input spectrum and a template/learned noise spectrum
1080 // magn_tmp is the input spectrum
1081 // the reference/template spectrum is inst->magn_avg_pause[i]
1082 // returns (normalized) spectral difference in inst->featureSpecDiff
WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC * inst,uint16_t * magnIn)1083 void WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC* inst,
1084 uint16_t* magnIn) {
1085 // This is to be calculated:
1086 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
1087
1088 uint32_t tmpU32no1, tmpU32no2;
1089 uint32_t varMagnUFX, varPauseUFX, avgDiffNormMagnUFX;
1090
1091 int32_t tmp32no1, tmp32no2;
1092 int32_t avgPauseFX, avgMagnFX, covMagnPauseFX;
1093 int32_t maxPause, minPause;
1094
1095 int16_t tmp16no1;
1096
1097 size_t i;
1098 int norm32, nShifts;
1099
1100 avgPauseFX = 0;
1101 maxPause = 0;
1102 minPause = inst->avgMagnPause[0]; // Q(prevQMagn)
1103 // compute average quantities
1104 for (i = 0; i < inst->magnLen; i++) {
1105 // Compute mean of magn_pause
1106 avgPauseFX += inst->avgMagnPause[i]; // in Q(prevQMagn)
1107 maxPause = WEBRTC_SPL_MAX(maxPause, inst->avgMagnPause[i]);
1108 minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]);
1109 }
1110 // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts
1111 avgPauseFX >>= inst->stages - 1;
1112 avgMagnFX = inst->sumMagn >> (inst->stages - 1);
1113 // Largest possible deviation in magnPause for (co)var calculations
1114 tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause);
1115 // Get number of shifts to make sure we don't get wrap around in varPause
1116 nShifts = WEBRTC_SPL_MAX(0, 10 + inst->stages - WebRtcSpl_NormW32(tmp32no1));
1117
1118 varMagnUFX = 0;
1119 varPauseUFX = 0;
1120 covMagnPauseFX = 0;
1121 for (i = 0; i < inst->magnLen; i++) {
1122 // Compute var and cov of magn and magn_pause
1123 tmp16no1 = (int16_t)((int32_t)magnIn[i] - avgMagnFX);
1124 tmp32no2 = inst->avgMagnPause[i] - avgPauseFX;
1125 varMagnUFX += (uint32_t)(tmp16no1 * tmp16no1); // Q(2*qMagn)
1126 tmp32no1 = tmp32no2 * tmp16no1; // Q(prevQMagn+qMagn)
1127 covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn)
1128 tmp32no1 = tmp32no2 >> nShifts; // Q(prevQMagn-minPause).
1129 varPauseUFX += tmp32no1 * tmp32no1; // Q(2*(prevQMagn-minPause))
1130 }
1131 //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts
1132 inst->curAvgMagnEnergy +=
1133 inst->magnEnergy >> (2 * inst->normData + inst->stages - 1);
1134
1135 avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn)
1136 if ((varPauseUFX) && (covMagnPauseFX)) {
1137 tmpU32no1 = (uint32_t)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn)
1138 norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16;
1139 if (norm32 > 0) {
1140 tmpU32no1 <<= norm32; // Q(prevQMagn+qMagn+norm32)
1141 } else {
1142 tmpU32no1 >>= -norm32; // Q(prevQMagn+qMagn+norm32)
1143 }
1144 tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32))
1145
1146 nShifts += norm32;
1147 nShifts <<= 1;
1148 if (nShifts < 0) {
1149 varPauseUFX >>= (-nShifts); // Q(2*(qMagn+norm32+minPause))
1150 nShifts = 0;
1151 }
1152 if (varPauseUFX > 0) {
1153 // Q(2*(qMagn+norm32-16+minPause))
1154 tmpU32no1 = tmpU32no2 / varPauseUFX;
1155 tmpU32no1 >>= nShifts;
1156
1157 // Q(2*qMagn)
1158 avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1);
1159 } else {
1160 avgDiffNormMagnUFX = 0;
1161 }
1162 }
1163 //normalize and compute time average update of difference feature
1164 tmpU32no1 = avgDiffNormMagnUFX >> (2 * inst->normData);
1165 if (inst->featureSpecDiff > tmpU32no1) {
1166 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1,
1167 SPECT_DIFF_TAVG_Q8); // Q(8-2*stages)
1168 inst->featureSpecDiff -= tmpU32no2 >> 8; // Q(-2*stages)
1169 } else {
1170 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff,
1171 SPECT_DIFF_TAVG_Q8); // Q(8-2*stages)
1172 inst->featureSpecDiff += tmpU32no2 >> 8; // Q(-2*stages)
1173 }
1174 }
1175
1176 // Transform input (speechFrame) to frequency domain magnitude (magnU16)
WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC * inst,short * speechFrame,uint16_t * magnU16)1177 void WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC* inst,
1178 short* speechFrame,
1179 uint16_t* magnU16) {
1180 uint32_t tmpU32no1;
1181
1182 int32_t tmp_1_w32 = 0;
1183 int32_t tmp_2_w32 = 0;
1184 int32_t sum_log_magn = 0;
1185 int32_t sum_log_i_log_magn = 0;
1186
1187 uint16_t sum_log_magn_u16 = 0;
1188 uint16_t tmp_u16 = 0;
1189
1190 int16_t sum_log_i = 0;
1191 int16_t sum_log_i_square = 0;
1192 int16_t frac = 0;
1193 int16_t log2 = 0;
1194 int16_t matrix_determinant = 0;
1195 int16_t maxWinData;
1196
1197 size_t i, j;
1198 int zeros;
1199 int net_norm = 0;
1200 int right_shifts_in_magnU16 = 0;
1201 int right_shifts_in_initMagnEst = 0;
1202
1203 int16_t winData_buff[ANAL_BLOCKL_MAX * 2 + 16];
1204 int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16];
1205
1206 // Align the structures to 32-byte boundary for the FFT function.
1207 int16_t* winData = (int16_t*) (((uintptr_t)winData_buff + 31) & ~31);
1208 int16_t* realImag = (int16_t*) (((uintptr_t) realImag_buff + 31) & ~31);
1209
1210 // Update analysis buffer for lower band, and window data before FFT.
1211 WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame);
1212
1213 // Get input energy
1214 inst->energyIn =
1215 WebRtcSpl_Energy(winData, inst->anaLen, &inst->scaleEnergyIn);
1216
1217 // Reset zero input flag
1218 inst->zeroInputSignal = 0;
1219 // Acquire norm for winData
1220 maxWinData = WebRtcSpl_MaxAbsValueW16(winData, inst->anaLen);
1221 inst->normData = WebRtcSpl_NormW16(maxWinData);
1222 if (maxWinData == 0) {
1223 // Treat zero input separately.
1224 inst->zeroInputSignal = 1;
1225 return;
1226 }
1227
1228 // Determine the net normalization in the frequency domain
1229 net_norm = inst->stages - inst->normData;
1230 // Track lowest normalization factor and use it to prevent wrap around in shifting
1231 right_shifts_in_magnU16 = inst->normData - inst->minNorm;
1232 right_shifts_in_initMagnEst = WEBRTC_SPL_MAX(-right_shifts_in_magnU16, 0);
1233 inst->minNorm -= right_shifts_in_initMagnEst;
1234 right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
1235
1236 // create realImag as winData interleaved with zeros (= imag. part), normalize it
1237 WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag);
1238
1239 // FFT output will be in winData[].
1240 WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
1241
1242 inst->imag[0] = 0; // Q(normData-stages)
1243 inst->imag[inst->anaLen2] = 0;
1244 inst->real[0] = winData[0]; // Q(normData-stages)
1245 inst->real[inst->anaLen2] = winData[inst->anaLen];
1246 // Q(2*(normData-stages))
1247 inst->magnEnergy = (uint32_t)(inst->real[0] * inst->real[0]);
1248 inst->magnEnergy += (uint32_t)(inst->real[inst->anaLen2] *
1249 inst->real[inst->anaLen2]);
1250 magnU16[0] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages)
1251 magnU16[inst->anaLen2] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]);
1252 inst->sumMagn = (uint32_t)magnU16[0]; // Q(normData-stages)
1253 inst->sumMagn += (uint32_t)magnU16[inst->anaLen2];
1254
1255 if (inst->blockIndex >= END_STARTUP_SHORT) {
1256 for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
1257 inst->real[i] = winData[j];
1258 inst->imag[i] = -winData[j + 1];
1259 // magnitude spectrum
1260 // energy in Q(2*(normData-stages))
1261 tmpU32no1 = (uint32_t)(winData[j] * winData[j]);
1262 tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]);
1263 inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
1264
1265 magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
1266 inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages)
1267 }
1268 } else {
1269 //
1270 // Gather information during startup for noise parameter estimation
1271 //
1272
1273 // Switch initMagnEst to Q(minNorm-stages)
1274 inst->initMagnEst[0] >>= right_shifts_in_initMagnEst;
1275 inst->initMagnEst[inst->anaLen2] >>= right_shifts_in_initMagnEst;
1276
1277 // Update initMagnEst with magnU16 in Q(minNorm-stages).
1278 inst->initMagnEst[0] += magnU16[0] >> right_shifts_in_magnU16;
1279 inst->initMagnEst[inst->anaLen2] +=
1280 magnU16[inst->anaLen2] >> right_shifts_in_magnU16;
1281
1282 log2 = 0;
1283 if (magnU16[inst->anaLen2]) {
1284 // Calculate log2(magnU16[inst->anaLen2])
1285 zeros = WebRtcSpl_NormU32((uint32_t)magnU16[inst->anaLen2]);
1286 frac = (int16_t)((((uint32_t)magnU16[inst->anaLen2] << zeros) &
1287 0x7FFFFFFF) >> 23); // Q8
1288 // log2(magnU16(i)) in Q8
1289 RTC_DCHECK_LT(frac, 256);
1290 log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
1291 }
1292
1293 sum_log_magn = (int32_t)log2; // Q8
1294 // sum_log_i_log_magn in Q17
1295 sum_log_i_log_magn = (kLogIndex[inst->anaLen2] * log2) >> 3;
1296
1297 for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
1298 inst->real[i] = winData[j];
1299 inst->imag[i] = -winData[j + 1];
1300 // magnitude spectrum
1301 // energy in Q(2*(normData-stages))
1302 tmpU32no1 = (uint32_t)(winData[j] * winData[j]);
1303 tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]);
1304 inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
1305
1306 magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
1307 inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages)
1308
1309 // Switch initMagnEst to Q(minNorm-stages)
1310 inst->initMagnEst[i] >>= right_shifts_in_initMagnEst;
1311
1312 // Update initMagnEst with magnU16 in Q(minNorm-stages).
1313 inst->initMagnEst[i] += magnU16[i] >> right_shifts_in_magnU16;
1314
1315 if (i >= kStartBand) {
1316 // For pink noise estimation. Collect data neglecting lower frequency band
1317 log2 = 0;
1318 if (magnU16[i]) {
1319 zeros = WebRtcSpl_NormU32((uint32_t)magnU16[i]);
1320 frac = (int16_t)((((uint32_t)magnU16[i] << zeros) &
1321 0x7FFFFFFF) >> 23);
1322 // log2(magnU16(i)) in Q8
1323 RTC_DCHECK_LT(frac, 256);
1324 log2 = (int16_t)(((31 - zeros) << 8)
1325 + WebRtcNsx_kLogTableFrac[frac]);
1326 }
1327 sum_log_magn += (int32_t)log2; // Q8
1328 // sum_log_i_log_magn in Q17
1329 sum_log_i_log_magn += (kLogIndex[i] * log2) >> 3;
1330 }
1331 }
1332
1333 //
1334 //compute simplified noise model during startup
1335 //
1336
1337 // Estimate White noise
1338
1339 // Switch whiteNoiseLevel to Q(minNorm-stages)
1340 inst->whiteNoiseLevel >>= right_shifts_in_initMagnEst;
1341
1342 // Update the average magnitude spectrum, used as noise estimate.
1343 tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive);
1344 tmpU32no1 >>= inst->stages + 8;
1345
1346 // Replacing division above with 'stages' shifts
1347 // Shift to same Q-domain as whiteNoiseLevel
1348 tmpU32no1 >>= right_shifts_in_magnU16;
1349 // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128
1350 RTC_DCHECK_LT(END_STARTUP_SHORT, 128);
1351 inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages)
1352
1353 // Estimate Pink noise parameters
1354 // Denominator used in both parameter estimates.
1355 // The value is only dependent on the size of the frequency band (kStartBand)
1356 // and to reduce computational complexity stored in a table (kDeterminantEstMatrix[])
1357 RTC_DCHECK_LT(kStartBand, 66);
1358 matrix_determinant = kDeterminantEstMatrix[kStartBand]; // Q0
1359 sum_log_i = kSumLogIndex[kStartBand]; // Q5
1360 sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2
1361 if (inst->fs == 8000) {
1362 // Adjust values to shorter blocks in narrow band.
1363 tmp_1_w32 = (int32_t)matrix_determinant;
1364 tmp_1_w32 += (kSumLogIndex[65] * sum_log_i) >> 9;
1365 tmp_1_w32 -= (kSumLogIndex[65] * kSumLogIndex[65]) >> 10;
1366 tmp_1_w32 -= (int32_t)sum_log_i_square << 4;
1367 tmp_1_w32 -= ((inst->magnLen - kStartBand) * kSumSquareLogIndex[65]) >> 2;
1368 matrix_determinant = (int16_t)tmp_1_w32;
1369 sum_log_i -= kSumLogIndex[65]; // Q5
1370 sum_log_i_square -= kSumSquareLogIndex[65]; // Q2
1371 }
1372
1373 // Necessary number of shifts to fit sum_log_magn in a word16
1374 zeros = 16 - WebRtcSpl_NormW32(sum_log_magn);
1375 if (zeros < 0) {
1376 zeros = 0;
1377 }
1378 tmp_1_w32 = sum_log_magn << 1; // Q9
1379 sum_log_magn_u16 = (uint16_t)(tmp_1_w32 >> zeros); // Q(9-zeros).
1380
1381 // Calculate and update pinkNoiseNumerator. Result in Q11.
1382 tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros)
1383 tmpU32no1 = sum_log_i_log_magn >> 12; // Q5
1384
1385 // Shift the largest value of sum_log_i and tmp32no3 before multiplication
1386 tmp_u16 = ((uint16_t)sum_log_i << 1); // Q6
1387 if ((uint32_t)sum_log_i > tmpU32no1) {
1388 tmp_u16 >>= zeros;
1389 } else {
1390 tmpU32no1 >>= zeros;
1391 }
1392 tmp_2_w32 -= (int32_t)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros)
1393 matrix_determinant >>= zeros; // Q(-zeros)
1394 tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11
1395 tmp_2_w32 += (int32_t)net_norm << 11; // Q11
1396 if (tmp_2_w32 < 0) {
1397 tmp_2_w32 = 0;
1398 }
1399 inst->pinkNoiseNumerator += tmp_2_w32; // Q11
1400
1401 // Calculate and update pinkNoiseExp. Result in Q14.
1402 tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros)
1403 tmp_1_w32 = sum_log_i_log_magn >> (3 + zeros);
1404 tmp_1_w32 *= inst->magnLen - kStartBand;
1405 tmp_2_w32 -= tmp_1_w32; // Q(14-zeros)
1406 if (tmp_2_w32 > 0) {
1407 // If the exponential parameter is negative force it to zero, which means a
1408 // flat spectrum.
1409 tmp_1_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q14
1410 inst->pinkNoiseExp += WEBRTC_SPL_SAT(16384, tmp_1_w32, 0); // Q14
1411 }
1412 }
1413 }
1414
WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC * inst,short * outFrame)1415 void WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC* inst, short* outFrame) {
1416 int32_t energyOut;
1417
1418 int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16];
1419 int16_t rfft_out_buff[ANAL_BLOCKL_MAX * 2 + 16];
1420
1421 // Align the structures to 32-byte boundary for the FFT function.
1422 int16_t* realImag = (int16_t*) (((uintptr_t)realImag_buff + 31) & ~31);
1423 int16_t* rfft_out = (int16_t*) (((uintptr_t) rfft_out_buff + 31) & ~31);
1424
1425 int16_t tmp16no1, tmp16no2;
1426 int16_t energyRatio;
1427 int16_t gainFactor, gainFactor1, gainFactor2;
1428
1429 size_t i;
1430 int outCIFFT;
1431 int scaleEnergyOut = 0;
1432
1433 if (inst->zeroInputSignal) {
1434 // synthesize the special case of zero input
1435 // read out fully processed segment
1436 for (i = 0; i < inst->blockLen10ms; i++) {
1437 outFrame[i] = inst->synthesisBuffer[i]; // Q0
1438 }
1439 // update synthesis buffer
1440 memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
1441 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
1442 WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms,
1443 inst->blockLen10ms);
1444 return;
1445 }
1446
1447 // Filter the data in the frequency domain, and create spectrum.
1448 WebRtcNsx_PrepareSpectrum(inst, realImag);
1449
1450 // Inverse FFT output will be in rfft_out[].
1451 outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
1452
1453 WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
1454
1455 //scale factor: only do it after END_STARTUP_LONG time
1456 gainFactor = 8192; // 8192 = Q13(1.0)
1457 if (inst->gainMap == 1 &&
1458 inst->blockIndex > END_STARTUP_LONG &&
1459 inst->energyIn > 0) {
1460 // Q(-scaleEnergyOut)
1461 energyOut = WebRtcSpl_Energy(inst->real, inst->anaLen, &scaleEnergyOut);
1462 if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) {
1463 energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut
1464 - inst->scaleEnergyIn);
1465 } else {
1466 // |energyIn| is currently in Q(|scaleEnergyIn|), but to later on end up
1467 // with an |energyRatio| in Q8 we need to change the Q-domain to
1468 // Q(-8-scaleEnergyOut).
1469 inst->energyIn >>= 8 + scaleEnergyOut - inst->scaleEnergyIn;
1470 }
1471
1472 RTC_DCHECK_GT(inst->energyIn, 0);
1473 energyRatio = (energyOut + inst->energyIn / 2) / inst->energyIn; // Q8
1474 // Limit the ratio to [0, 1] in Q8, i.e., [0, 256]
1475 energyRatio = WEBRTC_SPL_SAT(256, energyRatio, 0);
1476
1477 // all done in lookup tables now
1478 RTC_DCHECK_LT(energyRatio, 257);
1479 gainFactor1 = kFactor1Table[energyRatio]; // Q8
1480 gainFactor2 = inst->factor2Table[energyRatio]; // Q8
1481
1482 //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent
1483
1484 // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code
1485 tmp16no1 = (int16_t)(((16384 - inst->priorNonSpeechProb) * gainFactor1) >>
1486 14); // in Q13, where 16384 = Q14(1.0)
1487 tmp16no2 = (int16_t)((inst->priorNonSpeechProb * gainFactor2) >> 14);
1488 gainFactor = tmp16no1 + tmp16no2; // Q13
1489 } // out of flag_gain_map==1
1490
1491 // Synthesis, read out fully processed segment, and update synthesis buffer.
1492 WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
1493 }
1494
WebRtcNsx_ProcessCore(NoiseSuppressionFixedC * inst,const short * const * speechFrame,int num_bands,short * const * outFrame)1495 void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
1496 const short* const* speechFrame,
1497 int num_bands,
1498 short* const* outFrame) {
1499 // main routine for noise suppression
1500
1501 uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
1502 uint32_t satMax, maxNoiseU32;
1503 uint32_t tmpMagnU32, tmpNoiseU32;
1504 uint32_t nearMagnEst;
1505 uint32_t noiseUpdateU32;
1506 uint32_t noiseU32[HALF_ANAL_BLOCKL];
1507 uint32_t postLocSnr[HALF_ANAL_BLOCKL];
1508 uint32_t priorLocSnr[HALF_ANAL_BLOCKL];
1509 uint32_t prevNearSnr[HALF_ANAL_BLOCKL];
1510 uint32_t curNearSnr;
1511 uint32_t priorSnr;
1512 uint32_t noise_estimate = 0;
1513 uint32_t noise_estimate_avg = 0;
1514 uint32_t numerator = 0;
1515
1516 int32_t tmp32no1, tmp32no2;
1517 int32_t pink_noise_num_avg = 0;
1518
1519 uint16_t tmpU16no1;
1520 uint16_t magnU16[HALF_ANAL_BLOCKL];
1521 uint16_t prevNoiseU16[HALF_ANAL_BLOCKL];
1522 uint16_t nonSpeechProbFinal[HALF_ANAL_BLOCKL];
1523 uint16_t gammaNoise, prevGammaNoise;
1524 uint16_t noiseSupFilterTmp[HALF_ANAL_BLOCKL];
1525
1526 int16_t qMagn, qNoise;
1527 int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB;
1528 int16_t pink_noise_exp_avg = 0;
1529
1530 size_t i, j;
1531 int nShifts, postShifts;
1532 int norm32no1, norm32no2;
1533 int flag, sign;
1534 int q_domain_to_use = 0;
1535
1536 // Code for ARMv7-Neon platform assumes the following:
1537 RTC_DCHECK_GT(inst->anaLen, 0);
1538 RTC_DCHECK_GT(inst->anaLen2, 0);
1539 RTC_DCHECK_EQ(0, inst->anaLen % 16);
1540 RTC_DCHECK_EQ(0, inst->anaLen2 % 8);
1541 RTC_DCHECK_GT(inst->blockLen10ms, 0);
1542 RTC_DCHECK_EQ(0, inst->blockLen10ms % 16);
1543 RTC_DCHECK_EQ(inst->magnLen, inst->anaLen2 + 1);
1544
1545 #ifdef NS_FILEDEBUG
1546 if (fwrite(spframe, sizeof(short),
1547 inst->blockLen10ms, inst->infile) != inst->blockLen10ms) {
1548 RTC_NOTREACHED();
1549 }
1550 #endif
1551
1552 // Check that initialization has been done
1553 RTC_DCHECK_EQ(1, inst->initFlag);
1554 RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX);
1555
1556 const short* const* speechFrameHB = NULL;
1557 short* const* outFrameHB = NULL;
1558 size_t num_high_bands = 0;
1559 if (num_bands > 1) {
1560 speechFrameHB = &speechFrame[1];
1561 outFrameHB = &outFrame[1];
1562 num_high_bands = (size_t)(num_bands - 1);
1563 }
1564
1565 // Store speechFrame and transform to frequency domain
1566 WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16);
1567
1568 if (inst->zeroInputSignal) {
1569 WebRtcNsx_DataSynthesis(inst, outFrame[0]);
1570
1571 if (num_bands > 1) {
1572 // update analysis buffer for H band
1573 // append new data to buffer FX
1574 for (i = 0; i < num_high_bands; ++i) {
1575 int block_shift = inst->anaLen - inst->blockLen10ms;
1576 memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms,
1577 block_shift * sizeof(*inst->dataBufHBFX[i]));
1578 memcpy(inst->dataBufHBFX[i] + block_shift, speechFrameHB[i],
1579 inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i]));
1580 for (j = 0; j < inst->blockLen10ms; j++) {
1581 outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0
1582 }
1583 }
1584 } // end of H band gain computation
1585 return;
1586 }
1587
1588 // Update block index when we have something to process
1589 inst->blockIndex++;
1590 //
1591
1592 // Norm of magn
1593 qMagn = inst->normData - inst->stages;
1594
1595 // Compute spectral flatness on input spectrum
1596 WebRtcNsx_ComputeSpectralFlatness(inst, magnU16);
1597
1598 // quantile noise estimate
1599 WebRtcNsx_NoiseEstimation(inst, magnU16, noiseU32, &qNoise);
1600
1601 //noise estimate from previous frame
1602 for (i = 0; i < inst->magnLen; i++) {
1603 prevNoiseU16[i] = (uint16_t)(inst->prevNoiseU32[i] >> 11); // Q(prevQNoise)
1604 }
1605
1606 if (inst->blockIndex < END_STARTUP_SHORT) {
1607 // Noise Q-domain to be used later; see description at end of section.
1608 q_domain_to_use = WEBRTC_SPL_MIN((int)qNoise, inst->minNorm - inst->stages);
1609
1610 // Calculate frequency independent parts in parametric noise estimate and calculate
1611 // the estimate for the lower frequency band (same values for all frequency bins)
1612 if (inst->pinkNoiseExp) {
1613 pink_noise_exp_avg = (int16_t)WebRtcSpl_DivW32W16(inst->pinkNoiseExp,
1614 (int16_t)(inst->blockIndex + 1)); // Q14
1615 pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator,
1616 (int16_t)(inst->blockIndex + 1)); // Q11
1617 WebRtcNsx_CalcParametricNoiseEstimate(inst,
1618 pink_noise_exp_avg,
1619 pink_noise_num_avg,
1620 kStartBand,
1621 &noise_estimate,
1622 &noise_estimate_avg);
1623 } else {
1624 // Use white noise estimate if we have poor pink noise parameter estimates
1625 noise_estimate = inst->whiteNoiseLevel; // Q(minNorm-stages)
1626 noise_estimate_avg = noise_estimate / (inst->blockIndex + 1); // Q(minNorm-stages)
1627 }
1628 for (i = 0; i < inst->magnLen; i++) {
1629 // Estimate the background noise using the pink noise parameters if permitted
1630 if ((inst->pinkNoiseExp) && (i >= kStartBand)) {
1631 // Reset noise_estimate
1632 noise_estimate = 0;
1633 noise_estimate_avg = 0;
1634 // Calculate the parametric noise estimate for current frequency bin
1635 WebRtcNsx_CalcParametricNoiseEstimate(inst,
1636 pink_noise_exp_avg,
1637 pink_noise_num_avg,
1638 i,
1639 &noise_estimate,
1640 &noise_estimate_avg);
1641 }
1642 // Calculate parametric Wiener filter
1643 noiseSupFilterTmp[i] = inst->denoiseBound;
1644 if (inst->initMagnEst[i]) {
1645 // numerator = (initMagnEst - noise_estimate * overdrive)
1646 // Result in Q(8+minNorm-stages)
1647 tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive);
1648 numerator = inst->initMagnEst[i] << 8;
1649 if (numerator > tmpU32no1) {
1650 // Suppression filter coefficient larger than zero, so calculate.
1651 numerator -= tmpU32no1;
1652
1653 // Determine number of left shifts in numerator for best accuracy after
1654 // division
1655 nShifts = WebRtcSpl_NormU32(numerator);
1656 nShifts = WEBRTC_SPL_SAT(6, nShifts, 0);
1657
1658 // Shift numerator to Q(nShifts+8+minNorm-stages)
1659 numerator <<= nShifts;
1660
1661 // Shift denominator to Q(nShifts-6+minNorm-stages)
1662 tmpU32no1 = inst->initMagnEst[i] >> (6 - nShifts);
1663 if (tmpU32no1 == 0) {
1664 // This is only possible if numerator = 0, in which case
1665 // we don't need any division.
1666 tmpU32no1 = 1;
1667 }
1668 tmpU32no2 = numerator / tmpU32no1; // Q14
1669 noiseSupFilterTmp[i] = (uint16_t)WEBRTC_SPL_SAT(16384, tmpU32no2,
1670 (uint32_t)(inst->denoiseBound)); // Q14
1671 }
1672 }
1673 // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg'
1674 // 'noiseU32 is in Q(qNoise) and 'noise_estimate' in Q(minNorm-stages)
1675 // To guarantee that we do not get wrap around when shifting to the same domain
1676 // we use the lowest one. Furthermore, we need to save 6 bits for the weighting.
1677 // 'noise_estimate_avg' can handle this operation by construction, but 'noiseU32'
1678 // may not.
1679
1680 // Shift 'noiseU32' to 'q_domain_to_use'
1681 tmpU32no1 = noiseU32[i] >> (qNoise - q_domain_to_use);
1682 // Shift 'noise_estimate_avg' to 'q_domain_to_use'
1683 tmpU32no2 = noise_estimate_avg >>
1684 (inst->minNorm - inst->stages - q_domain_to_use);
1685 // Make a simple check to see if we have enough room for weighting 'tmpU32no1'
1686 // without wrap around
1687 nShifts = 0;
1688 if (tmpU32no1 & 0xfc000000) {
1689 tmpU32no1 >>= 6;
1690 tmpU32no2 >>= 6;
1691 nShifts = 6;
1692 }
1693 tmpU32no1 *= inst->blockIndex;
1694 tmpU32no2 *= (END_STARTUP_SHORT - inst->blockIndex);
1695 // Add them together and divide by startup length
1696 noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT);
1697 // Shift back if necessary
1698 noiseU32[i] <<= nShifts;
1699 }
1700 // Update new Q-domain for 'noiseU32'
1701 qNoise = q_domain_to_use;
1702 }
1703 // compute average signal during END_STARTUP_LONG time:
1704 // used to normalize spectral difference measure
1705 if (inst->blockIndex < END_STARTUP_LONG) {
1706 // substituting division with shift ending up in Q(-2*stages)
1707 inst->timeAvgMagnEnergyTmp +=
1708 inst->magnEnergy >> (2 * inst->normData + inst->stages - 1);
1709 inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp,
1710 inst->blockIndex + 1);
1711 }
1712
1713 //start processing at frames == converged+1
1714 // STEP 1: compute prior and post SNR based on quantile noise estimates
1715
1716 // compute direct decision (DD) estimate of prior SNR: needed for new method
1717 satMax = (uint32_t)1048575;// Largest possible value without getting overflow despite shifting 12 steps
1718 postShifts = 6 + qMagn - qNoise;
1719 nShifts = 5 - inst->prevQMagn + inst->prevQNoise;
1720 for (i = 0; i < inst->magnLen; i++) {
1721 // FLOAT:
1722 // post SNR
1723 // postLocSnr[i] = 0.0;
1724 // if (magn[i] > noise[i])
1725 // {
1726 // postLocSnr[i] = magn[i] / (noise[i] + 0.0001);
1727 // }
1728 // // previous post SNR
1729 // // previous estimate: based on previous frame with gain filter (smooth is previous filter)
1730 //
1731 // prevNearSnr[i] = inst->prevMagnU16[i] / (inst->noisePrev[i] + 0.0001) * (inst->smooth[i]);
1732 //
1733 // // DD estimate is sum of two terms: current estimate and previous estimate
1734 // // directed decision update of priorSnr (or we actually store [2*priorSnr+1])
1735 //
1736 // priorLocSnr[i] = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * (postLocSnr[i] - 1.0);
1737
1738 // calculate post SNR: output in Q11
1739 postLocSnr[i] = 2048; // 1.0 in Q11
1740 tmpU32no1 = (uint32_t)magnU16[i] << 6; // Q(6+qMagn)
1741 if (postShifts < 0) {
1742 tmpU32no2 = noiseU32[i] >> -postShifts; // Q(6+qMagn)
1743 } else {
1744 tmpU32no2 = noiseU32[i] << postShifts; // Q(6+qMagn)
1745 }
1746 if (tmpU32no1 > tmpU32no2) {
1747 // Current magnitude larger than noise
1748 tmpU32no1 <<= 11; // Q(17+qMagn)
1749 if (tmpU32no2 > 0) {
1750 tmpU32no1 /= tmpU32no2; // Q11
1751 postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
1752 } else {
1753 postLocSnr[i] = satMax;
1754 }
1755 }
1756
1757 // calculate prevNearSnr[i] and save for later instead of recalculating it later
1758 // |nearMagnEst| in Q(prevQMagn + 14)
1759 nearMagnEst = inst->prevMagnU16[i] * inst->noiseSupFilter[i];
1760 tmpU32no1 = nearMagnEst << 3; // Q(prevQMagn+17)
1761 tmpU32no2 = inst->prevNoiseU32[i] >> nShifts; // Q(prevQMagn+6)
1762
1763 if (tmpU32no2 > 0) {
1764 tmpU32no1 /= tmpU32no2; // Q11
1765 tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
1766 } else {
1767 tmpU32no1 = satMax; // Q11
1768 }
1769 prevNearSnr[i] = tmpU32no1; // Q11
1770
1771 //directed decision update of priorSnr
1772 tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22
1773 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22
1774 priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding)
1775 // priorLocSnr = 1 + 2*priorSnr
1776 priorLocSnr[i] = 2048 + (priorSnr >> 10); // Q11
1777 } // end of loop over frequencies
1778 // done with step 1: DD computation of prior and post SNR
1779
1780 // STEP 2: compute speech/noise likelihood
1781
1782 //compute difference of input spectrum with learned/estimated noise spectrum
1783 WebRtcNsx_ComputeSpectralDifference(inst, magnU16);
1784 //compute histograms for determination of parameters (thresholds and weights for features)
1785 //parameters are extracted once every window time (=inst->modelUpdate)
1786 //counter update
1787 inst->cntThresUpdate++;
1788 flag = (int)(inst->cntThresUpdate == inst->modelUpdate);
1789 //update histogram
1790 WebRtcNsx_FeatureParameterExtraction(inst, flag);
1791 //compute model parameters
1792 if (flag) {
1793 inst->cntThresUpdate = 0; // Reset counter
1794 //update every window:
1795 // get normalization for spectral difference for next window estimate
1796
1797 // Shift to Q(-2*stages)
1798 inst->curAvgMagnEnergy >>= STAT_UPDATES;
1799
1800 tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages)
1801 // Update featureSpecDiff
1802 if ((tmpU32no1 != inst->timeAvgMagnEnergy) && (inst->featureSpecDiff) &&
1803 (inst->timeAvgMagnEnergy > 0)) {
1804 norm32no1 = 0;
1805 tmpU32no3 = tmpU32no1;
1806 while (0xFFFF0000 & tmpU32no3) {
1807 tmpU32no3 >>= 1;
1808 norm32no1++;
1809 }
1810 tmpU32no2 = inst->featureSpecDiff;
1811 while (0xFFFF0000 & tmpU32no2) {
1812 tmpU32no2 >>= 1;
1813 norm32no1++;
1814 }
1815 tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2);
1816 tmpU32no3 /= inst->timeAvgMagnEnergy;
1817 if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) {
1818 inst->featureSpecDiff = 0x007FFFFF;
1819 } else {
1820 inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF,
1821 tmpU32no3 << norm32no1);
1822 }
1823 }
1824
1825 inst->timeAvgMagnEnergy = tmpU32no1; // Q(-2*stages)
1826 inst->curAvgMagnEnergy = 0;
1827 }
1828
1829 //compute speech/noise probability
1830 WebRtcNsx_SpeechNoiseProb(inst, nonSpeechProbFinal, priorLocSnr, postLocSnr);
1831
1832 //time-avg parameter for noise update
1833 gammaNoise = NOISE_UPDATE_Q8; // Q8
1834
1835 maxNoiseU32 = 0;
1836 postShifts = inst->prevQNoise - qMagn;
1837 nShifts = inst->prevQMagn - qMagn;
1838 for (i = 0; i < inst->magnLen; i++) {
1839 // temporary noise update: use it for speech frames if update value is less than previous
1840 // the formula has been rewritten into:
1841 // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i])
1842
1843 if (postShifts < 0) {
1844 tmpU32no2 = magnU16[i] >> -postShifts; // Q(prevQNoise)
1845 } else {
1846 tmpU32no2 = (uint32_t)magnU16[i] << postShifts; // Q(prevQNoise)
1847 }
1848 if (prevNoiseU16[i] > tmpU32no2) {
1849 sign = -1;
1850 tmpU32no1 = prevNoiseU16[i] - tmpU32no2;
1851 } else {
1852 sign = 1;
1853 tmpU32no1 = tmpU32no2 - prevNoiseU16[i];
1854 }
1855 noiseUpdateU32 = inst->prevNoiseU32[i]; // Q(prevQNoise+11)
1856 tmpU32no3 = 0;
1857 if ((tmpU32no1) && (nonSpeechProbFinal[i])) {
1858 // This value will be used later, if gammaNoise changes
1859 tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8)
1860 if (0x7c000000 & tmpU32no3) {
1861 // Shifting required before multiplication
1862 tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11)
1863 } else {
1864 // We can do shifting after multiplication
1865 tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11)
1866 }
1867 if (sign > 0) {
1868 noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11)
1869 } else {
1870 // This operation is safe. We can never get wrap around, since worst
1871 // case scenario means magnU16 = 0
1872 noiseUpdateU32 -= tmpU32no2; // Q(prevQNoise+11)
1873 }
1874 }
1875
1876 //increase gamma (i.e., less noise update) for frame likely to be speech
1877 prevGammaNoise = gammaNoise;
1878 gammaNoise = NOISE_UPDATE_Q8;
1879 //time-constant based on speech/noise state
1880 //increase gamma (i.e., less noise update) for frames likely to be speech
1881 if (nonSpeechProbFinal[i] < ONE_MINUS_PROB_RANGE_Q8) {
1882 gammaNoise = GAMMA_NOISE_TRANS_AND_SPEECH_Q8;
1883 }
1884
1885 if (prevGammaNoise != gammaNoise) {
1886 // new noise update
1887 // this line is the same as above, only that the result is stored in a different variable and the gammaNoise
1888 // has changed
1889 //
1890 // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i])
1891
1892 if (0x7c000000 & tmpU32no3) {
1893 // Shifting required before multiplication
1894 tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11)
1895 } else {
1896 // We can do shifting after multiplication
1897 tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11)
1898 }
1899 if (sign > 0) {
1900 tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11)
1901 } else {
1902 tmpU32no1 = inst->prevNoiseU32[i] - tmpU32no2; // Q(prevQNoise+11)
1903 }
1904 if (noiseUpdateU32 > tmpU32no1) {
1905 noiseUpdateU32 = tmpU32no1; // Q(prevQNoise+11)
1906 }
1907 }
1908 noiseU32[i] = noiseUpdateU32; // Q(prevQNoise+11)
1909 if (noiseUpdateU32 > maxNoiseU32) {
1910 maxNoiseU32 = noiseUpdateU32;
1911 }
1912
1913 // conservative noise update
1914 // // original FLOAT code
1915 // if (prob_speech < PROB_RANGE) {
1916 // inst->avgMagnPause[i] = inst->avgMagnPause[i] + (1.0 - gamma_pause)*(magn[i] - inst->avgMagnPause[i]);
1917 // }
1918
1919 tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts);
1920 if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) {
1921 if (nShifts < 0) {
1922 tmp32no1 = (int32_t)magnU16[i] - tmp32no2; // Q(qMagn)
1923 tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts)
1924 tmp32no1 = (tmp32no1 + 128) >> 8; // Q(qMagn).
1925 } else {
1926 // In Q(qMagn+nShifts)
1927 tmp32no1 = ((int32_t)magnU16[i] << nShifts) - inst->avgMagnPause[i];
1928 tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts)
1929 tmp32no1 = (tmp32no1 + (128 << nShifts)) >> (8 + nShifts); // Q(qMagn).
1930 }
1931 tmp32no2 += tmp32no1; // Q(qMagn)
1932 }
1933 inst->avgMagnPause[i] = tmp32no2;
1934 } // end of frequency loop
1935
1936 norm32no1 = WebRtcSpl_NormU32(maxNoiseU32);
1937 qNoise = inst->prevQNoise + norm32no1 - 5;
1938 // done with step 2: noise update
1939
1940 // STEP 3: compute dd update of prior snr and post snr based on new noise estimate
1941 nShifts = inst->prevQNoise + 11 - qMagn;
1942 for (i = 0; i < inst->magnLen; i++) {
1943 // FLOAT code
1944 // // post and prior SNR
1945 // curNearSnr = 0.0;
1946 // if (magn[i] > noise[i])
1947 // {
1948 // curNearSnr = magn[i] / (noise[i] + 0.0001) - 1.0;
1949 // }
1950 // // DD estimate is sum of two terms: current estimate and previous estimate
1951 // // directed decision update of snrPrior
1952 // snrPrior = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * curNearSnr;
1953 // // gain filter
1954 // tmpFloat1 = inst->overdrive + snrPrior;
1955 // tmpFloat2 = snrPrior / tmpFloat1;
1956 // theFilter[i] = tmpFloat2;
1957
1958 // calculate curNearSnr again, this is necessary because a new noise estimate has been made since then. for the original
1959 curNearSnr = 0; // Q11
1960 if (nShifts < 0) {
1961 // This case is equivalent with magn < noise which implies curNearSnr = 0;
1962 tmpMagnU32 = (uint32_t)magnU16[i]; // Q(qMagn)
1963 tmpNoiseU32 = noiseU32[i] << -nShifts; // Q(qMagn)
1964 } else if (nShifts > 17) {
1965 tmpMagnU32 = (uint32_t)magnU16[i] << 17; // Q(qMagn+17)
1966 tmpNoiseU32 = noiseU32[i] >> (nShifts - 17); // Q(qMagn+17)
1967 } else {
1968 tmpMagnU32 = (uint32_t)magnU16[i] << nShifts; // Q(qNoise_prev+11)
1969 tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11)
1970 }
1971 if (tmpMagnU32 > tmpNoiseU32) {
1972 tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur)
1973 norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1));
1974 tmpU32no1 <<= norm32no2; // Q(qCur+norm32no2)
1975 tmpU32no2 = tmpNoiseU32 >> (11 - norm32no2); // Q(qCur+norm32no2-11)
1976 if (tmpU32no2 > 0) {
1977 tmpU32no1 /= tmpU32no2; // Q11
1978 }
1979 curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
1980 }
1981
1982 //directed decision update of priorSnr
1983 // FLOAT
1984 // priorSnr = DD_PR_SNR * prevNearSnr + (1.0-DD_PR_SNR) * curNearSnr;
1985
1986 tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22
1987 tmpU32no2 = WEBRTC_SPL_UMUL_32_16(curNearSnr, ONE_MINUS_DD_PR_SNR_Q11); // Q22
1988 priorSnr = tmpU32no1 + tmpU32no2; // Q22
1989
1990 //gain filter
1991 tmpU32no1 = inst->overdrive + ((priorSnr + 8192) >> 14); // Q8
1992 RTC_DCHECK_GT(inst->overdrive, 0);
1993 tmpU16no1 = (priorSnr + tmpU32no1 / 2) / tmpU32no1; // Q14
1994 inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14
1995
1996 // Weight in the parametric Wiener filter during startup
1997 if (inst->blockIndex < END_STARTUP_SHORT) {
1998 // Weight the two suppression filters
1999 tmpU32no1 = inst->noiseSupFilter[i] * inst->blockIndex;
2000 tmpU32no2 = noiseSupFilterTmp[i] *
2001 (END_STARTUP_SHORT - inst->blockIndex);
2002 tmpU32no1 += tmpU32no2;
2003 inst->noiseSupFilter[i] = (uint16_t)WebRtcSpl_DivU32U16(tmpU32no1,
2004 END_STARTUP_SHORT);
2005 }
2006 } // end of loop over frequencies
2007 //done with step3
2008
2009 // save noise and magnitude spectrum for next frame
2010 inst->prevQNoise = qNoise;
2011 inst->prevQMagn = qMagn;
2012 if (norm32no1 > 5) {
2013 for (i = 0; i < inst->magnLen; i++) {
2014 inst->prevNoiseU32[i] = noiseU32[i] << (norm32no1 - 5); // Q(qNoise+11)
2015 inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn)
2016 }
2017 } else {
2018 for (i = 0; i < inst->magnLen; i++) {
2019 inst->prevNoiseU32[i] = noiseU32[i] >> (5 - norm32no1); // Q(qNoise+11)
2020 inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn)
2021 }
2022 }
2023
2024 WebRtcNsx_DataSynthesis(inst, outFrame[0]);
2025 #ifdef NS_FILEDEBUG
2026 if (fwrite(outframe, sizeof(short),
2027 inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) {
2028 RTC_NOTREACHED();
2029 }
2030 #endif
2031
2032 //for H band:
2033 // only update data buffer, then apply time-domain gain is applied derived from L band
2034 if (num_bands > 1) {
2035 // update analysis buffer for H band
2036 // append new data to buffer FX
2037 for (i = 0; i < num_high_bands; ++i) {
2038 memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms,
2039 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->dataBufHBFX[i]));
2040 memcpy(inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
2041 speechFrameHB[i], inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i]));
2042 }
2043 // range for averaging low band quantities for H band gain
2044
2045 gainTimeDomainHB = 16384; // 16384 = Q14(1.0)
2046 //average speech prob from low band
2047 //average filter gain from low band
2048 //avg over second half (i.e., 4->8kHz) of freq. spectrum
2049 tmpU32no1 = 0; // Q12
2050 tmpU16no1 = 0; // Q8
2051 for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) {
2052 tmpU16no1 += nonSpeechProbFinal[i]; // Q8
2053 tmpU32no1 += (uint32_t)(inst->noiseSupFilter[i]); // Q14
2054 }
2055 RTC_DCHECK_GE(inst->stages, 7);
2056 avgProbSpeechHB = (4096 - (tmpU16no1 >> (inst->stages - 7))); // Q12
2057 avgFilterGainHB = (int16_t)(tmpU32no1 >> (inst->stages - 3)); // Q14
2058
2059 // // original FLOAT code
2060 // // gain based on speech probability:
2061 // avg_prob_speech_tt=(float)2.0*avg_prob_speech-(float)1.0;
2062 // gain_mod=(float)0.5*((float)1.0+(float)tanh(avg_prob_speech_tt)); // between 0 and 1
2063
2064 // gain based on speech probability:
2065 // original expression: "0.5 * (1 + tanh(2x-1))"
2066 // avgProbSpeechHB has been anyway saturated to a value between 0 and 1 so the other cases don't have to be dealt with
2067 // avgProbSpeechHB and gainModHB are in Q12, 3607 = Q12(0.880615234375) which is a zero point of
2068 // |0.5 * (1 + tanh(2x-1)) - x| - |0.5 * (1 + tanh(2x-1)) - 0.880615234375| meaning that from that point the error of approximating
2069 // the expression with f(x) = x would be greater than the error of approximating the expression with f(x) = 0.880615234375
2070 // error: "|0.5 * (1 + tanh(2x-1)) - x| from x=0 to 0.880615234375" -> http://www.wolframalpha.com/input/?i=|0.5+*+(1+%2B+tanh(2x-1))+-+x|+from+x%3D0+to+0.880615234375
2071 // and: "|0.5 * (1 + tanh(2x-1)) - 0.880615234375| from x=0.880615234375 to 1" -> http://www.wolframalpha.com/input/?i=+|0.5+*+(1+%2B+tanh(2x-1))+-+0.880615234375|+from+x%3D0.880615234375+to+1
2072 gainModHB = WEBRTC_SPL_MIN(avgProbSpeechHB, 3607);
2073
2074 // // original FLOAT code
2075 // //combine gain with low band gain
2076 // if (avg_prob_speech < (float)0.5) {
2077 // gain_time_domain_HB=(float)0.5*gain_mod+(float)0.5*avg_filter_gain;
2078 // }
2079 // else {
2080 // gain_time_domain_HB=(float)0.25*gain_mod+(float)0.75*avg_filter_gain;
2081 // }
2082
2083
2084 //combine gain with low band gain
2085 if (avgProbSpeechHB < 2048) {
2086 // 2048 = Q12(0.5)
2087 // the next two lines in float are "gain_time_domain = 0.5 * gain_mod + 0.5 * avg_filter_gain"; Q2(0.5) = 2 equals one left shift
2088 gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14
2089 } else {
2090 // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;"
2091 gainTimeDomainHB = (int16_t)((3 * avgFilterGainHB) >> 2); // 3 = Q2(0.75)
2092 gainTimeDomainHB += gainModHB; // Q14
2093 }
2094 //make sure gain is within flooring range
2095 gainTimeDomainHB
2096 = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (int16_t)(inst->denoiseBound)); // 16384 = Q14(1.0)
2097
2098
2099 //apply gain
2100 for (i = 0; i < num_high_bands; ++i) {
2101 for (j = 0; j < inst->blockLen10ms; j++) {
2102 outFrameHB[i][j] = (int16_t)((gainTimeDomainHB *
2103 inst->dataBufHBFX[i][j]) >> 14); // Q0
2104 }
2105 }
2106 } // end of H band gain computation
2107 }
2108