1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/rnn_vad/rnn.h"
12 
13 #include <array>
14 #include <memory>
15 #include <vector>
16 
17 #include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
18 #include "modules/audio_processing/test/performance_timer.h"
19 #include "rtc_base/checks.h"
20 #include "rtc_base/logging.h"
21 #include "rtc_base/system/arch.h"
22 #include "test/gtest.h"
23 #include "third_party/rnnoise/src/rnn_activations.h"
24 #include "third_party/rnnoise/src/rnn_vad_weights.h"
25 
26 namespace webrtc {
27 namespace rnn_vad {
28 namespace test {
29 
30 namespace {
31 
TestFullyConnectedLayer(FullyConnectedLayer * fc,rtc::ArrayView<const float> input_vector,rtc::ArrayView<const float> expected_output)32 void TestFullyConnectedLayer(FullyConnectedLayer* fc,
33                              rtc::ArrayView<const float> input_vector,
34                              rtc::ArrayView<const float> expected_output) {
35   RTC_CHECK(fc);
36   fc->ComputeOutput(input_vector);
37   ExpectNearAbsolute(expected_output, fc->GetOutput(), 1e-5f);
38 }
39 
TestGatedRecurrentLayer(GatedRecurrentLayer * gru,rtc::ArrayView<const float> input_sequence,rtc::ArrayView<const float> expected_output_sequence)40 void TestGatedRecurrentLayer(
41     GatedRecurrentLayer* gru,
42     rtc::ArrayView<const float> input_sequence,
43     rtc::ArrayView<const float> expected_output_sequence) {
44   RTC_CHECK(gru);
45   auto gru_output_view = gru->GetOutput();
46   const size_t input_sequence_length =
47       rtc::CheckedDivExact(input_sequence.size(), gru->input_size());
48   const size_t output_sequence_length =
49       rtc::CheckedDivExact(expected_output_sequence.size(), gru->output_size());
50   ASSERT_EQ(input_sequence_length, output_sequence_length)
51       << "The test data length is invalid.";
52   // Feed the GRU layer and check the output at every step.
53   gru->Reset();
54   for (size_t i = 0; i < input_sequence_length; ++i) {
55     SCOPED_TRACE(i);
56     gru->ComputeOutput(
57         input_sequence.subview(i * gru->input_size(), gru->input_size()));
58     const auto expected_output = expected_output_sequence.subview(
59         i * gru->output_size(), gru->output_size());
60     ExpectNearAbsolute(expected_output, gru_output_view, 3e-6f);
61   }
62 }
63 
64 // Fully connected layer test data.
65 constexpr std::array<float, 42> kFullyConnectedInputVector = {
66     -1.00131f,   -0.627069f, -7.81097f,  7.86285f,    -2.87145f,  3.32365f,
67     -0.653161f,  0.529839f,  -0.425307f, 0.25583f,    0.235094f,  0.230527f,
68     -0.144687f,  0.182785f,  0.57102f,   0.125039f,   0.479482f,  -0.0255439f,
69     -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f,  3.09065f,
70     1.42628f,    -0.85235f,  -0.220207f, -0.811163f,  2.09032f,   -2.01425f,
71     -0.690268f,  -0.925327f, -0.541354f, 0.58455f,    -0.606726f, -0.0372358f,
72     0.565991f,   0.435854f,  0.420812f,  0.162198f,   -2.13f,     10.0089f};
73 constexpr std::array<float, 24> kFullyConnectedExpectedOutput = {
74     -0.623293f, -0.988299f, 0.999378f,  0.967168f,  0.103087f,  -0.978545f,
75     -0.856347f, 0.346675f,  1.f,        -0.717442f, -0.544176f, 0.960363f,
76     0.983443f,  0.999991f,  -0.824335f, 0.984742f,  0.990208f,  0.938179f,
77     0.875092f,  0.999846f,  0.997707f,  -0.999382f, 0.973153f,  -0.966605f};
78 
79 // Gated recurrent units layer test data.
80 constexpr size_t kGruInputSize = 5;
81 constexpr size_t kGruOutputSize = 4;
82 constexpr std::array<int8_t, 12> kGruBias = {96,   -99, -81, -114, 49,  119,
83                                              -118, 68,  -76, 91,   121, 125};
84 constexpr std::array<int8_t, 60> kGruWeights = {
85     // Input 0.
86     124, 9, 1, 116,        // Update.
87     -66, -21, -118, -110,  // Reset.
88     104, 75, -23, -51,     // Output.
89     // Input 1.
90     -72, -111, 47, 93,   // Update.
91     77, -98, 41, -8,     // Reset.
92     40, -23, -43, -107,  // Output.
93     // Input 2.
94     9, -73, 30, -32,      // Update.
95     -2, 64, -26, 91,      // Reset.
96     -48, -24, -28, -104,  // Output.
97     // Input 3.
98     74, -46, 116, 15,    // Update.
99     32, 52, -126, -38,   // Reset.
100     -121, 12, -16, 110,  // Output.
101     // Input 4.
102     -95, 66, -103, -35,  // Update.
103     -38, 3, -126, -61,   // Reset.
104     28, 98, -117, -43    // Output.
105 };
106 constexpr std::array<int8_t, 48> kGruRecurrentWeights = {
107     // Output 0.
108     -3, 87, 50, 51,     // Update.
109     -22, 27, -39, 62,   // Reset.
110     31, -83, -52, -48,  // Output.
111     // Output 1.
112     -6, 83, -19, 104,  // Update.
113     105, 48, 23, 68,   // Reset.
114     23, 40, 7, -120,   // Output.
115     // Output 2.
116     64, -62, 117, 85,     // Update.
117     51, -43, 54, -105,    // Reset.
118     120, 56, -128, -107,  // Output.
119     // Output 3.
120     39, 50, -17, -47,   // Update.
121     -117, 14, 108, 12,  // Reset.
122     -7, -72, 103, -87,  // Output.
123 };
124 constexpr std::array<float, 20> kGruInputSequence = {
125     0.89395463f, 0.93224651f, 0.55788344f, 0.32341808f, 0.93355054f,
126     0.13475326f, 0.97370994f, 0.14253306f, 0.93710381f, 0.76093364f,
127     0.65780413f, 0.41657975f, 0.49403164f, 0.46843281f, 0.75138855f,
128     0.24517593f, 0.47657707f, 0.57064998f, 0.435184f,   0.19319285f};
129 constexpr std::array<float, 16> kGruExpectedOutputSequence = {
130     0.0239123f,  0.5773077f,  0.f,         0.f,
131     0.01282811f, 0.64330572f, 0.f,         0.04863098f,
132     0.00781069f, 0.75267816f, 0.f,         0.02579715f,
133     0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f};
134 
GetOptimizationName(Optimization optimization)135 std::string GetOptimizationName(Optimization optimization) {
136   switch (optimization) {
137     case Optimization::kSse2:
138       return "SSE2";
139     case Optimization::kNeon:
140       return "NEON";
141     case Optimization::kNone:
142       return "none";
143   }
144 }
145 
146 struct Result {
147   Optimization optimization;
148   double average_us;
149   double std_dev_us;
150 };
151 
152 }  // namespace
153 
154 // Checks that the output of a fully connected layer is within tolerance given
155 // test input data.
TEST(RnnVadTest,CheckFullyConnectedLayerOutput)156 TEST(RnnVadTest, CheckFullyConnectedLayerOutput) {
157   FullyConnectedLayer fc(rnnoise::kInputLayerInputSize,
158                          rnnoise::kInputLayerOutputSize,
159                          rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
160                          rnnoise::TansigApproximated, Optimization::kNone);
161   TestFullyConnectedLayer(&fc, kFullyConnectedInputVector,
162                           kFullyConnectedExpectedOutput);
163 }
164 
165 // Checks that the output of a GRU layer is within tolerance given test input
166 // data.
TEST(RnnVadTest,CheckGatedRecurrentLayer)167 TEST(RnnVadTest, CheckGatedRecurrentLayer) {
168   GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights,
169                           kGruRecurrentWeights, Optimization::kNone);
170   TestGatedRecurrentLayer(&gru, kGruInputSequence, kGruExpectedOutputSequence);
171 }
172 
173 #if defined(WEBRTC_ARCH_X86_FAMILY)
174 
175 // Like CheckFullyConnectedLayerOutput, but testing the SSE2 implementation.
TEST(RnnVadTest,CheckFullyConnectedLayerOutputSse2)176 TEST(RnnVadTest, CheckFullyConnectedLayerOutputSse2) {
177   if (!IsOptimizationAvailable(Optimization::kSse2)) {
178     return;
179   }
180 
181   FullyConnectedLayer fc(rnnoise::kInputLayerInputSize,
182                          rnnoise::kInputLayerOutputSize,
183                          rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
184                          rnnoise::TansigApproximated, Optimization::kSse2);
185   TestFullyConnectedLayer(&fc, kFullyConnectedInputVector,
186                           kFullyConnectedExpectedOutput);
187 }
188 
189 // Like CheckGatedRecurrentLayer, but testing the SSE2 implementation.
TEST(RnnVadTest,CheckGatedRecurrentLayerSse2)190 TEST(RnnVadTest, CheckGatedRecurrentLayerSse2) {
191   if (!IsOptimizationAvailable(Optimization::kSse2)) {
192     return;
193   }
194 
195   GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights,
196                           kGruRecurrentWeights, Optimization::kSse2);
197   TestGatedRecurrentLayer(&gru, kGruInputSequence, kGruExpectedOutputSequence);
198 }
199 
200 #endif  // WEBRTC_ARCH_X86_FAMILY
201 
TEST(RnnVadTest,DISABLED_BenchmarkFullyConnectedLayer)202 TEST(RnnVadTest, DISABLED_BenchmarkFullyConnectedLayer) {
203   std::vector<std::unique_ptr<FullyConnectedLayer>> implementations;
204   implementations.emplace_back(std::make_unique<FullyConnectedLayer>(
205       rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize,
206       rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
207       rnnoise::TansigApproximated, Optimization::kNone));
208   if (IsOptimizationAvailable(Optimization::kSse2)) {
209     implementations.emplace_back(std::make_unique<FullyConnectedLayer>(
210         rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize,
211         rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
212         rnnoise::TansigApproximated, Optimization::kSse2));
213   }
214 
215   std::vector<Result> results;
216   constexpr size_t number_of_tests = 10000;
217   for (auto& fc : implementations) {
218     ::webrtc::test::PerformanceTimer perf_timer(number_of_tests);
219     for (size_t k = 0; k < number_of_tests; ++k) {
220       perf_timer.StartTimer();
221       fc->ComputeOutput(kFullyConnectedInputVector);
222       perf_timer.StopTimer();
223     }
224     results.push_back({fc->optimization(), perf_timer.GetDurationAverage(),
225                        perf_timer.GetDurationStandardDeviation()});
226   }
227 
228   for (const auto& result : results) {
229     RTC_LOG(LS_INFO) << GetOptimizationName(result.optimization) << ": "
230                      << (result.average_us / 1e3) << " +/- "
231                      << (result.std_dev_us / 1e3) << " ms";
232   }
233 }
234 
TEST(RnnVadTest,DISABLED_BenchmarkGatedRecurrentLayer)235 TEST(RnnVadTest, DISABLED_BenchmarkGatedRecurrentLayer) {
236   std::vector<std::unique_ptr<GatedRecurrentLayer>> implementations;
237   implementations.emplace_back(std::make_unique<GatedRecurrentLayer>(
238       kGruInputSize, kGruOutputSize, kGruBias, kGruWeights,
239       kGruRecurrentWeights, Optimization::kNone));
240 
241   rtc::ArrayView<const float> input_sequence(kGruInputSequence);
242   static_assert(kGruInputSequence.size() % kGruInputSize == 0, "");
243   constexpr size_t input_sequence_length =
244       kGruInputSequence.size() / kGruInputSize;
245 
246   std::vector<Result> results;
247   constexpr size_t number_of_tests = 10000;
248   for (auto& gru : implementations) {
249     ::webrtc::test::PerformanceTimer perf_timer(number_of_tests);
250     gru->Reset();
251     for (size_t k = 0; k < number_of_tests; ++k) {
252       perf_timer.StartTimer();
253       for (size_t i = 0; i < input_sequence_length; ++i) {
254         gru->ComputeOutput(
255             input_sequence.subview(i * gru->input_size(), gru->input_size()));
256       }
257       perf_timer.StopTimer();
258     }
259     results.push_back({gru->optimization(), perf_timer.GetDurationAverage(),
260                        perf_timer.GetDurationStandardDeviation()});
261   }
262 
263   for (const auto& result : results) {
264     RTC_LOG(LS_INFO) << GetOptimizationName(result.optimization) << ": "
265                      << (result.average_us / 1e3) << " +/- "
266                      << (result.std_dev_us / 1e3) << " ms";
267   }
268 }
269 
270 }  // namespace test
271 }  // namespace rnn_vad
272 }  // namespace webrtc
273