1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/neteq/include/neteq.h"
12 
13 #include <math.h>
14 #include <stdlib.h>
15 #include <string.h>  // memset
16 
17 #include <algorithm>
18 #include <memory>
19 #include <set>
20 #include <string>
21 #include <vector>
22 
23 #include "api/audio_codecs/builtin_audio_decoder_factory.h"
24 #include "common_types.h"  // NOLINT(build/include)
25 #include "modules/audio_coding/codecs/pcm16b/pcm16b.h"
26 #include "modules/audio_coding/neteq/tools/audio_loop.h"
27 #include "modules/audio_coding/neteq/tools/rtp_file_source.h"
28 #include "modules/include/module_common_types.h"
29 #include "rtc_base/flags.h"
30 #include "rtc_base/ignore_wundef.h"
31 #include "rtc_base/numerics/safe_conversions.h"
32 #include "rtc_base/protobuf_utils.h"
33 #include "rtc_base/sha1digest.h"
34 #include "rtc_base/stringencode.h"
35 #include "test/field_trial.h"
36 #include "test/gtest.h"
37 #include "test/testsupport/fileutils.h"
38 #include "typedefs.h"  // NOLINT(build/include)
39 
40 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
41 RTC_PUSH_IGNORING_WUNDEF()
42 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
43 #include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h"
44 #else
45 #include "modules/audio_coding/neteq/neteq_unittest.pb.h"
46 #endif
47 RTC_POP_IGNORING_WUNDEF()
48 #endif
49 
50 DEFINE_bool(gen_ref, false, "Generate reference files.");
51 
52 namespace webrtc {
53 
54 namespace {
55 
PlatformChecksum(const std::string & checksum_general,const std::string & checksum_android_32,const std::string & checksum_android_64,const std::string & checksum_win_32,const std::string & checksum_win_64)56 const std::string& PlatformChecksum(const std::string& checksum_general,
57                                     const std::string& checksum_android_32,
58                                     const std::string& checksum_android_64,
59                                     const std::string& checksum_win_32,
60                                     const std::string& checksum_win_64) {
61 #if defined(WEBRTC_ANDROID)
62   #ifdef WEBRTC_ARCH_64_BITS
63     return checksum_android_64;
64   #else
65     return checksum_android_32;
66   #endif  // WEBRTC_ARCH_64_BITS
67 #elif defined(WEBRTC_WIN)
68   #ifdef WEBRTC_ARCH_64_BITS
69     return checksum_win_64;
70   #else
71     return checksum_win_32;
72   #endif  // WEBRTC_ARCH_64_BITS
73 #else
74   return checksum_general;
75 #endif  // WEBRTC_WIN
76 }
77 
78 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
Convert(const webrtc::NetEqNetworkStatistics & stats_raw,webrtc::neteq_unittest::NetEqNetworkStatistics * stats)79 void Convert(const webrtc::NetEqNetworkStatistics& stats_raw,
80              webrtc::neteq_unittest::NetEqNetworkStatistics* stats) {
81   stats->set_current_buffer_size_ms(stats_raw.current_buffer_size_ms);
82   stats->set_preferred_buffer_size_ms(stats_raw.preferred_buffer_size_ms);
83   stats->set_jitter_peaks_found(stats_raw.jitter_peaks_found);
84   stats->set_packet_loss_rate(stats_raw.packet_loss_rate);
85   stats->set_expand_rate(stats_raw.expand_rate);
86   stats->set_speech_expand_rate(stats_raw.speech_expand_rate);
87   stats->set_preemptive_rate(stats_raw.preemptive_rate);
88   stats->set_accelerate_rate(stats_raw.accelerate_rate);
89   stats->set_secondary_decoded_rate(stats_raw.secondary_decoded_rate);
90   stats->set_secondary_discarded_rate(stats_raw.secondary_discarded_rate);
91   stats->set_clockdrift_ppm(stats_raw.clockdrift_ppm);
92   stats->set_added_zero_samples(stats_raw.added_zero_samples);
93   stats->set_mean_waiting_time_ms(stats_raw.mean_waiting_time_ms);
94   stats->set_median_waiting_time_ms(stats_raw.median_waiting_time_ms);
95   stats->set_min_waiting_time_ms(stats_raw.min_waiting_time_ms);
96   stats->set_max_waiting_time_ms(stats_raw.max_waiting_time_ms);
97 }
98 
Convert(const webrtc::RtcpStatistics & stats_raw,webrtc::neteq_unittest::RtcpStatistics * stats)99 void Convert(const webrtc::RtcpStatistics& stats_raw,
100              webrtc::neteq_unittest::RtcpStatistics* stats) {
101   stats->set_fraction_lost(stats_raw.fraction_lost);
102   stats->set_cumulative_lost(stats_raw.packets_lost);
103   stats->set_extended_max_sequence_number(
104       stats_raw.extended_highest_sequence_number);
105   stats->set_jitter(stats_raw.jitter);
106 }
107 
AddMessage(FILE * file,rtc::MessageDigest * digest,const std::string & message)108 void AddMessage(FILE* file, rtc::MessageDigest* digest,
109                 const std::string& message) {
110   int32_t size = message.length();
111   if (file)
112     ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file));
113   digest->Update(&size, sizeof(size));
114 
115   if (file)
116     ASSERT_EQ(static_cast<size_t>(size),
117               fwrite(message.data(), sizeof(char), size, file));
118   digest->Update(message.data(), sizeof(char) * size);
119 }
120 
121 #endif  // WEBRTC_NETEQ_UNITTEST_BITEXACT
122 
LoadDecoders(webrtc::NetEq * neteq)123 void LoadDecoders(webrtc::NetEq* neteq) {
124   ASSERT_EQ(true,
125             neteq->RegisterPayloadType(0, SdpAudioFormat("pcmu", 8000, 1)));
126   // Use non-SdpAudioFormat argument when registering PCMa, so that we get test
127   // coverage for that as well.
128   ASSERT_EQ(0, neteq->RegisterPayloadType(webrtc::NetEqDecoder::kDecoderPCMa,
129                                           "pcma", 8));
130 #ifdef WEBRTC_CODEC_ILBC
131   ASSERT_EQ(true,
132             neteq->RegisterPayloadType(102, SdpAudioFormat("ilbc", 8000, 1)));
133 #endif
134 #if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
135   ASSERT_EQ(true,
136             neteq->RegisterPayloadType(103, SdpAudioFormat("isac", 16000, 1)));
137 #endif
138 #ifdef WEBRTC_CODEC_ISAC
139   ASSERT_EQ(true,
140             neteq->RegisterPayloadType(104, SdpAudioFormat("isac", 32000, 1)));
141 #endif
142 #ifdef WEBRTC_CODEC_OPUS
143   ASSERT_EQ(true,
144             neteq->RegisterPayloadType(
145                 111, SdpAudioFormat("opus", 48000, 2, {{"stereo", "0"}})));
146 #endif
147   ASSERT_EQ(true,
148             neteq->RegisterPayloadType(93, SdpAudioFormat("L16", 8000, 1)));
149   ASSERT_EQ(true,
150             neteq->RegisterPayloadType(94, SdpAudioFormat("L16", 16000, 1)));
151   ASSERT_EQ(true,
152             neteq->RegisterPayloadType(95, SdpAudioFormat("L16", 32000, 1)));
153   ASSERT_EQ(true,
154             neteq->RegisterPayloadType(13, SdpAudioFormat("cn", 8000, 1)));
155   ASSERT_EQ(true,
156             neteq->RegisterPayloadType(98, SdpAudioFormat("cn", 16000, 1)));
157 }
158 }  // namespace
159 
160 class ResultSink {
161  public:
162   explicit ResultSink(const std::string& output_file);
163   ~ResultSink();
164 
165   template<typename T> void AddResult(const T* test_results, size_t length);
166 
167   void AddResult(const NetEqNetworkStatistics& stats);
168   void AddResult(const RtcpStatistics& stats);
169 
170   void VerifyChecksum(const std::string& ref_check_sum);
171 
172  private:
173   FILE* output_fp_;
174   std::unique_ptr<rtc::MessageDigest> digest_;
175 };
176 
ResultSink(const std::string & output_file)177 ResultSink::ResultSink(const std::string &output_file)
178     : output_fp_(nullptr),
179       digest_(new rtc::Sha1Digest()) {
180   if (!output_file.empty()) {
181     output_fp_ = fopen(output_file.c_str(), "wb");
182     EXPECT_TRUE(output_fp_ != NULL);
183   }
184 }
185 
~ResultSink()186 ResultSink::~ResultSink() {
187   if (output_fp_)
188     fclose(output_fp_);
189 }
190 
191 template<typename T>
AddResult(const T * test_results,size_t length)192 void ResultSink::AddResult(const T* test_results, size_t length) {
193   if (output_fp_) {
194     ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_));
195   }
196   digest_->Update(test_results, sizeof(T) * length);
197 }
198 
AddResult(const NetEqNetworkStatistics & stats_raw)199 void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) {
200 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
201   neteq_unittest::NetEqNetworkStatistics stats;
202   Convert(stats_raw, &stats);
203 
204   ProtoString stats_string;
205   ASSERT_TRUE(stats.SerializeToString(&stats_string));
206   AddMessage(output_fp_, digest_.get(), stats_string);
207 #else
208   FAIL() << "Writing to reference file requires Proto Buffer.";
209 #endif  // WEBRTC_NETEQ_UNITTEST_BITEXACT
210 }
211 
AddResult(const RtcpStatistics & stats_raw)212 void ResultSink::AddResult(const RtcpStatistics& stats_raw) {
213 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
214   neteq_unittest::RtcpStatistics stats;
215   Convert(stats_raw, &stats);
216 
217   ProtoString stats_string;
218   ASSERT_TRUE(stats.SerializeToString(&stats_string));
219   AddMessage(output_fp_, digest_.get(), stats_string);
220 #else
221   FAIL() << "Writing to reference file requires Proto Buffer.";
222 #endif  // WEBRTC_NETEQ_UNITTEST_BITEXACT
223 }
224 
VerifyChecksum(const std::string & checksum)225 void ResultSink::VerifyChecksum(const std::string& checksum) {
226   std::vector<char> buffer;
227   buffer.resize(digest_->Size());
228   digest_->Finish(&buffer[0], buffer.size());
229   const std::string result = rtc::hex_encode(&buffer[0], digest_->Size());
230   EXPECT_EQ(checksum, result);
231 }
232 
233 class NetEqDecodingTest : public ::testing::Test {
234  protected:
235   // NetEQ must be polled for data once every 10 ms. Thus, neither of the
236   // constants below can be changed.
237   static const int kTimeStepMs = 10;
238   static const size_t kBlockSize8kHz = kTimeStepMs * 8;
239   static const size_t kBlockSize16kHz = kTimeStepMs * 16;
240   static const size_t kBlockSize32kHz = kTimeStepMs * 32;
241   static const size_t kBlockSize48kHz = kTimeStepMs * 48;
242   static const int kInitSampleRateHz = 8000;
243 
244   NetEqDecodingTest();
245   virtual void SetUp();
246   virtual void TearDown();
247   void SelectDecoders(NetEqDecoder* used_codec);
248   void OpenInputFile(const std::string &rtp_file);
249   void Process();
250 
251   void DecodeAndCompare(const std::string& rtp_file,
252                         const std::string& output_checksum,
253                         const std::string& network_stats_checksum,
254                         const std::string& rtcp_stats_checksum,
255                         bool gen_ref);
256 
257   static void PopulateRtpInfo(int frame_index,
258                               int timestamp,
259                               RTPHeader* rtp_info);
260   static void PopulateCng(int frame_index,
261                           int timestamp,
262                           RTPHeader* rtp_info,
263                           uint8_t* payload,
264                           size_t* payload_len);
265 
266   void WrapTest(uint16_t start_seq_no, uint32_t start_timestamp,
267                 const std::set<uint16_t>& drop_seq_numbers,
268                 bool expect_seq_no_wrap, bool expect_timestamp_wrap);
269 
270   void LongCngWithClockDrift(double drift_factor,
271                              double network_freeze_ms,
272                              bool pull_audio_during_freeze,
273                              int delay_tolerance_ms,
274                              int max_time_to_speech_ms);
275 
276   void DuplicateCng();
277 
278   NetEq* neteq_;
279   NetEq::Config config_;
280   std::unique_ptr<test::RtpFileSource> rtp_source_;
281   std::unique_ptr<test::Packet> packet_;
282   unsigned int sim_clock_;
283   AudioFrame out_frame_;
284   int output_sample_rate_;
285   int algorithmic_delay_ms_;
286 };
287 
288 // Allocating the static const so that it can be passed by reference.
289 const int NetEqDecodingTest::kTimeStepMs;
290 const size_t NetEqDecodingTest::kBlockSize8kHz;
291 const size_t NetEqDecodingTest::kBlockSize16kHz;
292 const size_t NetEqDecodingTest::kBlockSize32kHz;
293 const int NetEqDecodingTest::kInitSampleRateHz;
294 
NetEqDecodingTest()295 NetEqDecodingTest::NetEqDecodingTest()
296     : neteq_(NULL),
297       config_(),
298       sim_clock_(0),
299       output_sample_rate_(kInitSampleRateHz),
300       algorithmic_delay_ms_(0) {
301   config_.sample_rate_hz = kInitSampleRateHz;
302 }
303 
SetUp()304 void NetEqDecodingTest::SetUp() {
305   neteq_ = NetEq::Create(config_, CreateBuiltinAudioDecoderFactory());
306   NetEqNetworkStatistics stat;
307   ASSERT_EQ(0, neteq_->NetworkStatistics(&stat));
308   algorithmic_delay_ms_ = stat.current_buffer_size_ms;
309   ASSERT_TRUE(neteq_);
310   LoadDecoders(neteq_);
311 }
312 
TearDown()313 void NetEqDecodingTest::TearDown() {
314   delete neteq_;
315 }
316 
OpenInputFile(const std::string & rtp_file)317 void NetEqDecodingTest::OpenInputFile(const std::string &rtp_file) {
318   rtp_source_.reset(test::RtpFileSource::Create(rtp_file));
319 }
320 
Process()321 void NetEqDecodingTest::Process() {
322   // Check if time to receive.
323   while (packet_ && sim_clock_ >= packet_->time_ms()) {
324     if (packet_->payload_length_bytes() > 0) {
325 #ifndef WEBRTC_CODEC_ISAC
326       // Ignore payload type 104 (iSAC-swb) if ISAC is not supported.
327       if (packet_->header().payloadType != 104)
328 #endif
329         ASSERT_EQ(0,
330                   neteq_->InsertPacket(
331                       packet_->header(),
332                       rtc::ArrayView<const uint8_t>(
333                           packet_->payload(), packet_->payload_length_bytes()),
334                       static_cast<uint32_t>(packet_->time_ms() *
335                                             (output_sample_rate_ / 1000))));
336     }
337     // Get next packet.
338     packet_ = rtp_source_->NextPacket();
339   }
340 
341   // Get audio from NetEq.
342   bool muted;
343   ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
344   ASSERT_FALSE(muted);
345   ASSERT_TRUE((out_frame_.samples_per_channel_ == kBlockSize8kHz) ||
346               (out_frame_.samples_per_channel_ == kBlockSize16kHz) ||
347               (out_frame_.samples_per_channel_ == kBlockSize32kHz) ||
348               (out_frame_.samples_per_channel_ == kBlockSize48kHz));
349   output_sample_rate_ = out_frame_.sample_rate_hz_;
350   EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz());
351 
352   // Increase time.
353   sim_clock_ += kTimeStepMs;
354 }
355 
DecodeAndCompare(const std::string & rtp_file,const std::string & output_checksum,const std::string & network_stats_checksum,const std::string & rtcp_stats_checksum,bool gen_ref)356 void NetEqDecodingTest::DecodeAndCompare(
357     const std::string& rtp_file,
358     const std::string& output_checksum,
359     const std::string& network_stats_checksum,
360     const std::string& rtcp_stats_checksum,
361     bool gen_ref) {
362   OpenInputFile(rtp_file);
363 
364   std::string ref_out_file =
365       gen_ref ? webrtc::test::OutputPath() + "neteq_universal_ref.pcm" : "";
366   ResultSink output(ref_out_file);
367 
368   std::string stat_out_file =
369       gen_ref ? webrtc::test::OutputPath() + "neteq_network_stats.dat" : "";
370   ResultSink network_stats(stat_out_file);
371 
372   std::string rtcp_out_file =
373       gen_ref ? webrtc::test::OutputPath() + "neteq_rtcp_stats.dat" : "";
374   ResultSink rtcp_stats(rtcp_out_file);
375 
376   packet_ = rtp_source_->NextPacket();
377   int i = 0;
378   uint64_t last_concealed_samples = 0;
379   uint64_t last_total_samples_received = 0;
380   while (packet_) {
381     std::ostringstream ss;
382     ss << "Lap number " << i++ << " in DecodeAndCompare while loop";
383     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
384     ASSERT_NO_FATAL_FAILURE(Process());
385     ASSERT_NO_FATAL_FAILURE(output.AddResult(
386         out_frame_.data(), out_frame_.samples_per_channel_));
387 
388     // Query the network statistics API once per second
389     if (sim_clock_ % 1000 == 0) {
390       // Process NetworkStatistics.
391       NetEqNetworkStatistics current_network_stats;
392       ASSERT_EQ(0, neteq_->NetworkStatistics(&current_network_stats));
393       ASSERT_NO_FATAL_FAILURE(network_stats.AddResult(current_network_stats));
394 
395       // Compare with CurrentDelay, which should be identical.
396       EXPECT_EQ(current_network_stats.current_buffer_size_ms,
397                 neteq_->CurrentDelayMs());
398 
399       // Verify that liftime stats and network stats report similar loss
400       // concealment rates.
401       auto lifetime_stats = neteq_->GetLifetimeStatistics();
402       const uint64_t delta_concealed_samples =
403           lifetime_stats.concealed_samples - last_concealed_samples;
404       last_concealed_samples = lifetime_stats.concealed_samples;
405       const uint64_t delta_total_samples_received =
406           lifetime_stats.total_samples_received - last_total_samples_received;
407       last_total_samples_received = lifetime_stats.total_samples_received;
408       // The tolerance is 1% but expressed in Q14.
409       EXPECT_NEAR(
410           (delta_concealed_samples << 14) / delta_total_samples_received,
411           current_network_stats.expand_rate, (2 << 14) / 100.0);
412 
413       // Process RTCPstat.
414       RtcpStatistics current_rtcp_stats;
415       neteq_->GetRtcpStatistics(&current_rtcp_stats);
416       ASSERT_NO_FATAL_FAILURE(rtcp_stats.AddResult(current_rtcp_stats));
417     }
418   }
419 
420   SCOPED_TRACE("Check output audio.");
421   output.VerifyChecksum(output_checksum);
422   SCOPED_TRACE("Check network stats.");
423   network_stats.VerifyChecksum(network_stats_checksum);
424   SCOPED_TRACE("Check rtcp stats.");
425   rtcp_stats.VerifyChecksum(rtcp_stats_checksum);
426 }
427 
PopulateRtpInfo(int frame_index,int timestamp,RTPHeader * rtp_info)428 void NetEqDecodingTest::PopulateRtpInfo(int frame_index,
429                                         int timestamp,
430                                         RTPHeader* rtp_info) {
431   rtp_info->sequenceNumber = frame_index;
432   rtp_info->timestamp = timestamp;
433   rtp_info->ssrc = 0x1234;     // Just an arbitrary SSRC.
434   rtp_info->payloadType = 94;  // PCM16b WB codec.
435   rtp_info->markerBit = 0;
436 }
437 
PopulateCng(int frame_index,int timestamp,RTPHeader * rtp_info,uint8_t * payload,size_t * payload_len)438 void NetEqDecodingTest::PopulateCng(int frame_index,
439                                     int timestamp,
440                                     RTPHeader* rtp_info,
441                                     uint8_t* payload,
442                                     size_t* payload_len) {
443   rtp_info->sequenceNumber = frame_index;
444   rtp_info->timestamp = timestamp;
445   rtp_info->ssrc = 0x1234;     // Just an arbitrary SSRC.
446   rtp_info->payloadType = 98;  // WB CNG.
447   rtp_info->markerBit = 0;
448   payload[0] = 64;  // Noise level -64 dBov, quite arbitrarily chosen.
449   *payload_len = 1;  // Only noise level, no spectral parameters.
450 }
451 
452 #if !defined(WEBRTC_IOS) && defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && \
453     (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) &&    \
454     defined(WEBRTC_CODEC_ILBC) && !defined(WEBRTC_ARCH_ARM64)
455 #define MAYBE_TestBitExactness TestBitExactness
456 #else
457 #define MAYBE_TestBitExactness DISABLED_TestBitExactness
458 #endif
TEST_F(NetEqDecodingTest,MAYBE_TestBitExactness)459 TEST_F(NetEqDecodingTest, MAYBE_TestBitExactness) {
460   const std::string input_rtp_file =
461       webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp");
462 
463   const std::string output_checksum = PlatformChecksum(
464       "09fa7646e2ad032a0b156177b95f09012430f81f",
465       "1c64eb8b55ce8878676c6a1e6ddd78f48de0668b",
466       "not used",
467       "09fa7646e2ad032a0b156177b95f09012430f81f",
468       "759fef89a5de52bd17e733dc255c671ce86be909");
469 
470   const std::string network_stats_checksum =
471       PlatformChecksum("5b4262ca328e5f066af5d34f3380521583dd20de",
472                        "80235b6d727281203acb63b98f9a9e85d95f7ec0",
473                        "not used",
474                        "5b4262ca328e5f066af5d34f3380521583dd20de",
475                        "5b4262ca328e5f066af5d34f3380521583dd20de");
476 
477   const std::string rtcp_stats_checksum = PlatformChecksum(
478       "b8880bf9fed2487efbddcb8d94b9937a29ae521d",
479       "f3f7b3d3e71d7e635240b5373b57df6a7e4ce9d4",
480       "not used",
481       "b8880bf9fed2487efbddcb8d94b9937a29ae521d",
482       "b8880bf9fed2487efbddcb8d94b9937a29ae521d");
483 
484   DecodeAndCompare(input_rtp_file,
485                    output_checksum,
486                    network_stats_checksum,
487                    rtcp_stats_checksum,
488                    FLAG_gen_ref);
489 }
490 
491 #if !defined(WEBRTC_IOS) &&                                         \
492     defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) &&                      \
493     defined(WEBRTC_CODEC_OPUS)
494 #define MAYBE_TestOpusBitExactness TestOpusBitExactness
495 #else
496 #define MAYBE_TestOpusBitExactness DISABLED_TestOpusBitExactness
497 #endif
TEST_F(NetEqDecodingTest,MAYBE_TestOpusBitExactness)498 TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) {
499   const std::string input_rtp_file =
500       webrtc::test::ResourcePath("audio_coding/neteq_opus", "rtp");
501 
502   const std::string output_checksum = PlatformChecksum(
503       "7ea28d7edf9395f4ac8e8d8dd3a9e5c620b1bf48",
504       "5b1e691ab1c4465c742d6d944bc71e3b1c0e4c0e",
505       "b096114dd8c233eaf2b0ce9802ac95af13933772",
506       "7ea28d7edf9395f4ac8e8d8dd3a9e5c620b1bf48",
507       "7ea28d7edf9395f4ac8e8d8dd3a9e5c620b1bf48");
508 
509   const std::string network_stats_checksum =
510       PlatformChecksum("9e72233c78baf685e500dd6c94212b30a4c5f27d",
511                        "9a37270e4242fbd31e80bb47dc5e7ab82cf2d557",
512                        "4f1e9734bc80a290faaf9d611efcb8d7802dbc4f",
513                        "9e72233c78baf685e500dd6c94212b30a4c5f27d",
514                        "9e72233c78baf685e500dd6c94212b30a4c5f27d");
515 
516   const std::string rtcp_stats_checksum = PlatformChecksum(
517       "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
518       "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
519       "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
520       "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
521       "e37c797e3de6a64dda88c9ade7a013d022a2e1e0");
522 
523   DecodeAndCompare(input_rtp_file,
524                    output_checksum,
525                    network_stats_checksum,
526                    rtcp_stats_checksum,
527                    FLAG_gen_ref);
528 }
529 
530 // This test fixture is identical to NetEqDecodingTest, except that it enables
531 // the WebRTC-NetEqOpusDtxDelayFix field trial.
532 // TODO(bugs.webrtc.org/8488): When the field trial is over and the feature is
533 // default enabled, remove this fixture class and let the
534 // TestOpusDtxBitExactness test build directly on NetEqDecodingTest.
535 class NetEqDecodingTestWithOpusDtxFieldTrial : public NetEqDecodingTest {
536  public:
NetEqDecodingTestWithOpusDtxFieldTrial()537   NetEqDecodingTestWithOpusDtxFieldTrial()
538       : override_field_trials_("WebRTC-NetEqOpusDtxDelayFix/Enabled/") {}
539 
540  private:
541   test::ScopedFieldTrials override_field_trials_;
542 };
543 
544 #if !defined(WEBRTC_IOS) &&                                         \
545     defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) &&                      \
546     defined(WEBRTC_CODEC_OPUS)
547 #define MAYBE_TestOpusDtxBitExactness TestOpusDtxBitExactness
548 #else
549 #define MAYBE_TestOpusDtxBitExactness DISABLED_TestOpusDtxBitExactness
550 #endif
TEST_F(NetEqDecodingTestWithOpusDtxFieldTrial,MAYBE_TestOpusDtxBitExactness)551 TEST_F(NetEqDecodingTestWithOpusDtxFieldTrial, MAYBE_TestOpusDtxBitExactness) {
552   const std::string input_rtp_file =
553       webrtc::test::ResourcePath("audio_coding/neteq_opus_dtx", "rtp");
554 
555   const std::string output_checksum =
556       PlatformChecksum("713af6c92881f5aab1285765ee6680da9d1c06ce",
557                        "3ec991b96872123f1554c03c543ca5d518431e46",
558                        "da9f9a2d94e0c2d67342fad4965d7b91cda50b25",
559                        "713af6c92881f5aab1285765ee6680da9d1c06ce",
560                        "713af6c92881f5aab1285765ee6680da9d1c06ce");
561 
562   const std::string network_stats_checksum =
563       "bab58dc587d956f326056d7340c96eb9d2d3cc21";
564 
565   const std::string rtcp_stats_checksum =
566       "ac27a7f305efb58b39bf123dccee25dee5758e63";
567 
568   DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum,
569                    rtcp_stats_checksum, FLAG_gen_ref);
570 }
571 
572 // Use fax mode to avoid time-scaling. This is to simplify the testing of
573 // packet waiting times in the packet buffer.
574 class NetEqDecodingTestFaxMode : public NetEqDecodingTest {
575  protected:
NetEqDecodingTestFaxMode()576   NetEqDecodingTestFaxMode() : NetEqDecodingTest() {
577     config_.playout_mode = kPlayoutFax;
578   }
579   void TestJitterBufferDelay(bool apply_packet_loss);
580 };
581 
TEST_F(NetEqDecodingTestFaxMode,TestFrameWaitingTimeStatistics)582 TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) {
583   // Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio.
584   size_t num_frames = 30;
585   const size_t kSamples = 10 * 16;
586   const size_t kPayloadBytes = kSamples * 2;
587   for (size_t i = 0; i < num_frames; ++i) {
588     const uint8_t payload[kPayloadBytes] = {0};
589     RTPHeader rtp_info;
590     rtp_info.sequenceNumber = rtc::checked_cast<uint16_t>(i);
591     rtp_info.timestamp = rtc::checked_cast<uint32_t>(i * kSamples);
592     rtp_info.ssrc = 0x1234;     // Just an arbitrary SSRC.
593     rtp_info.payloadType = 94;  // PCM16b WB codec.
594     rtp_info.markerBit = 0;
595     ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
596   }
597   // Pull out all data.
598   for (size_t i = 0; i < num_frames; ++i) {
599     bool muted;
600     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
601     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
602   }
603 
604   NetEqNetworkStatistics stats;
605   EXPECT_EQ(0, neteq_->NetworkStatistics(&stats));
606   // Since all frames are dumped into NetEQ at once, but pulled out with 10 ms
607   // spacing (per definition), we expect the delay to increase with 10 ms for
608   // each packet. Thus, we are calculating the statistics for a series from 10
609   // to 300, in steps of 10 ms.
610   EXPECT_EQ(155, stats.mean_waiting_time_ms);
611   EXPECT_EQ(155, stats.median_waiting_time_ms);
612   EXPECT_EQ(10, stats.min_waiting_time_ms);
613   EXPECT_EQ(300, stats.max_waiting_time_ms);
614 
615   // Check statistics again and make sure it's been reset.
616   EXPECT_EQ(0, neteq_->NetworkStatistics(&stats));
617   EXPECT_EQ(-1, stats.mean_waiting_time_ms);
618   EXPECT_EQ(-1, stats.median_waiting_time_ms);
619   EXPECT_EQ(-1, stats.min_waiting_time_ms);
620   EXPECT_EQ(-1, stats.max_waiting_time_ms);
621 }
622 
TEST_F(NetEqDecodingTest,TestAverageInterArrivalTimeNegative)623 TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) {
624   const int kNumFrames = 3000;  // Needed for convergence.
625   int frame_index = 0;
626   const size_t kSamples = 10 * 16;
627   const size_t kPayloadBytes = kSamples * 2;
628   while (frame_index < kNumFrames) {
629     // Insert one packet each time, except every 10th time where we insert two
630     // packets at once. This will create a negative clock-drift of approx. 10%.
631     int num_packets = (frame_index % 10 == 0 ? 2 : 1);
632     for (int n = 0; n < num_packets; ++n) {
633       uint8_t payload[kPayloadBytes] = {0};
634       RTPHeader rtp_info;
635       PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
636       ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
637       ++frame_index;
638     }
639 
640     // Pull out data once.
641     bool muted;
642     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
643     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
644   }
645 
646   NetEqNetworkStatistics network_stats;
647   ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
648   EXPECT_EQ(-103192, network_stats.clockdrift_ppm);
649 }
650 
TEST_F(NetEqDecodingTest,TestAverageInterArrivalTimePositive)651 TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
652   const int kNumFrames = 5000;  // Needed for convergence.
653   int frame_index = 0;
654   const size_t kSamples = 10 * 16;
655   const size_t kPayloadBytes = kSamples * 2;
656   for (int i = 0; i < kNumFrames; ++i) {
657     // Insert one packet each time, except every 10th time where we don't insert
658     // any packet. This will create a positive clock-drift of approx. 11%.
659     int num_packets = (i % 10 == 9 ? 0 : 1);
660     for (int n = 0; n < num_packets; ++n) {
661       uint8_t payload[kPayloadBytes] = {0};
662       RTPHeader rtp_info;
663       PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
664       ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
665       ++frame_index;
666     }
667 
668     // Pull out data once.
669     bool muted;
670     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
671     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
672   }
673 
674   NetEqNetworkStatistics network_stats;
675   ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
676   EXPECT_EQ(110953, network_stats.clockdrift_ppm);
677 }
678 
LongCngWithClockDrift(double drift_factor,double network_freeze_ms,bool pull_audio_during_freeze,int delay_tolerance_ms,int max_time_to_speech_ms)679 void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
680                                               double network_freeze_ms,
681                                               bool pull_audio_during_freeze,
682                                               int delay_tolerance_ms,
683                                               int max_time_to_speech_ms) {
684   uint16_t seq_no = 0;
685   uint32_t timestamp = 0;
686   const int kFrameSizeMs = 30;
687   const size_t kSamples = kFrameSizeMs * 16;
688   const size_t kPayloadBytes = kSamples * 2;
689   double next_input_time_ms = 0.0;
690   double t_ms;
691   bool muted;
692 
693   // Insert speech for 5 seconds.
694   const int kSpeechDurationMs = 5000;
695   for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
696     // Each turn in this for loop is 10 ms.
697     while (next_input_time_ms <= t_ms) {
698       // Insert one 30 ms speech frame.
699       uint8_t payload[kPayloadBytes] = {0};
700       RTPHeader rtp_info;
701       PopulateRtpInfo(seq_no, timestamp, &rtp_info);
702       ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
703       ++seq_no;
704       timestamp += kSamples;
705       next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor;
706     }
707     // Pull out data once.
708     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
709     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
710   }
711 
712   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
713   rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
714   ASSERT_TRUE(playout_timestamp);
715   int32_t delay_before = timestamp - *playout_timestamp;
716 
717   // Insert CNG for 1 minute (= 60000 ms).
718   const int kCngPeriodMs = 100;
719   const int kCngPeriodSamples = kCngPeriodMs * 16;  // Period in 16 kHz samples.
720   const int kCngDurationMs = 60000;
721   for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) {
722     // Each turn in this for loop is 10 ms.
723     while (next_input_time_ms <= t_ms) {
724       // Insert one CNG frame each 100 ms.
725       uint8_t payload[kPayloadBytes];
726       size_t payload_len;
727       RTPHeader rtp_info;
728       PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
729       ASSERT_EQ(0, neteq_->InsertPacket(
730                        rtp_info,
731                        rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
732       ++seq_no;
733       timestamp += kCngPeriodSamples;
734       next_input_time_ms += static_cast<double>(kCngPeriodMs) * drift_factor;
735     }
736     // Pull out data once.
737     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
738     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
739   }
740 
741   EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
742 
743   if (network_freeze_ms > 0) {
744     // First keep pulling audio for |network_freeze_ms| without inserting
745     // any data, then insert CNG data corresponding to |network_freeze_ms|
746     // without pulling any output audio.
747     const double loop_end_time = t_ms + network_freeze_ms;
748     for (; t_ms < loop_end_time; t_ms += 10) {
749       // Pull out data once.
750       ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
751       ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
752       EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
753     }
754     bool pull_once = pull_audio_during_freeze;
755     // If |pull_once| is true, GetAudio will be called once half-way through
756     // the network recovery period.
757     double pull_time_ms = (t_ms + next_input_time_ms) / 2;
758     while (next_input_time_ms <= t_ms) {
759       if (pull_once && next_input_time_ms >= pull_time_ms) {
760         pull_once = false;
761         // Pull out data once.
762         ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
763         ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
764         EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
765         t_ms += 10;
766       }
767       // Insert one CNG frame each 100 ms.
768       uint8_t payload[kPayloadBytes];
769       size_t payload_len;
770       RTPHeader rtp_info;
771       PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
772       ASSERT_EQ(0, neteq_->InsertPacket(
773                        rtp_info,
774                        rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
775       ++seq_no;
776       timestamp += kCngPeriodSamples;
777       next_input_time_ms += kCngPeriodMs * drift_factor;
778     }
779   }
780 
781   // Insert speech again until output type is speech.
782   double speech_restart_time_ms = t_ms;
783   while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) {
784     // Each turn in this for loop is 10 ms.
785     while (next_input_time_ms <= t_ms) {
786       // Insert one 30 ms speech frame.
787       uint8_t payload[kPayloadBytes] = {0};
788       RTPHeader rtp_info;
789       PopulateRtpInfo(seq_no, timestamp, &rtp_info);
790       ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
791       ++seq_no;
792       timestamp += kSamples;
793       next_input_time_ms += kFrameSizeMs * drift_factor;
794     }
795     // Pull out data once.
796     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
797     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
798     // Increase clock.
799     t_ms += 10;
800   }
801 
802   // Check that the speech starts again within reasonable time.
803   double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
804   EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
805   playout_timestamp = neteq_->GetPlayoutTimestamp();
806   ASSERT_TRUE(playout_timestamp);
807   int32_t delay_after = timestamp - *playout_timestamp;
808   // Compare delay before and after, and make sure it differs less than 20 ms.
809   EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16);
810   EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16);
811 }
812 
TEST_F(NetEqDecodingTest,LongCngWithNegativeClockDrift)813 TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDrift) {
814   // Apply a clock drift of -25 ms / s (sender faster than receiver).
815   const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
816   const double kNetworkFreezeTimeMs = 0.0;
817   const bool kGetAudioDuringFreezeRecovery = false;
818   const int kDelayToleranceMs = 20;
819   const int kMaxTimeToSpeechMs = 100;
820   LongCngWithClockDrift(kDriftFactor,
821                         kNetworkFreezeTimeMs,
822                         kGetAudioDuringFreezeRecovery,
823                         kDelayToleranceMs,
824                         kMaxTimeToSpeechMs);
825 }
826 
TEST_F(NetEqDecodingTest,LongCngWithPositiveClockDrift)827 TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDrift) {
828   // Apply a clock drift of +25 ms / s (sender slower than receiver).
829   const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
830   const double kNetworkFreezeTimeMs = 0.0;
831   const bool kGetAudioDuringFreezeRecovery = false;
832   const int kDelayToleranceMs = 20;
833   const int kMaxTimeToSpeechMs = 100;
834   LongCngWithClockDrift(kDriftFactor,
835                         kNetworkFreezeTimeMs,
836                         kGetAudioDuringFreezeRecovery,
837                         kDelayToleranceMs,
838                         kMaxTimeToSpeechMs);
839 }
840 
TEST_F(NetEqDecodingTest,LongCngWithNegativeClockDriftNetworkFreeze)841 TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDriftNetworkFreeze) {
842   // Apply a clock drift of -25 ms / s (sender faster than receiver).
843   const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
844   const double kNetworkFreezeTimeMs = 5000.0;
845   const bool kGetAudioDuringFreezeRecovery = false;
846   const int kDelayToleranceMs = 50;
847   const int kMaxTimeToSpeechMs = 200;
848   LongCngWithClockDrift(kDriftFactor,
849                         kNetworkFreezeTimeMs,
850                         kGetAudioDuringFreezeRecovery,
851                         kDelayToleranceMs,
852                         kMaxTimeToSpeechMs);
853 }
854 
TEST_F(NetEqDecodingTest,LongCngWithPositiveClockDriftNetworkFreeze)855 TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreeze) {
856   // Apply a clock drift of +25 ms / s (sender slower than receiver).
857   const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
858   const double kNetworkFreezeTimeMs = 5000.0;
859   const bool kGetAudioDuringFreezeRecovery = false;
860   const int kDelayToleranceMs = 20;
861   const int kMaxTimeToSpeechMs = 100;
862   LongCngWithClockDrift(kDriftFactor,
863                         kNetworkFreezeTimeMs,
864                         kGetAudioDuringFreezeRecovery,
865                         kDelayToleranceMs,
866                         kMaxTimeToSpeechMs);
867 }
868 
TEST_F(NetEqDecodingTest,LongCngWithPositiveClockDriftNetworkFreezeExtraPull)869 TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreezeExtraPull) {
870   // Apply a clock drift of +25 ms / s (sender slower than receiver).
871   const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
872   const double kNetworkFreezeTimeMs = 5000.0;
873   const bool kGetAudioDuringFreezeRecovery = true;
874   const int kDelayToleranceMs = 20;
875   const int kMaxTimeToSpeechMs = 100;
876   LongCngWithClockDrift(kDriftFactor,
877                         kNetworkFreezeTimeMs,
878                         kGetAudioDuringFreezeRecovery,
879                         kDelayToleranceMs,
880                         kMaxTimeToSpeechMs);
881 }
882 
TEST_F(NetEqDecodingTest,LongCngWithoutClockDrift)883 TEST_F(NetEqDecodingTest, LongCngWithoutClockDrift) {
884   const double kDriftFactor = 1.0;  // No drift.
885   const double kNetworkFreezeTimeMs = 0.0;
886   const bool kGetAudioDuringFreezeRecovery = false;
887   const int kDelayToleranceMs = 10;
888   const int kMaxTimeToSpeechMs = 50;
889   LongCngWithClockDrift(kDriftFactor,
890                         kNetworkFreezeTimeMs,
891                         kGetAudioDuringFreezeRecovery,
892                         kDelayToleranceMs,
893                         kMaxTimeToSpeechMs);
894 }
895 
TEST_F(NetEqDecodingTest,UnknownPayloadType)896 TEST_F(NetEqDecodingTest, UnknownPayloadType) {
897   const size_t kPayloadBytes = 100;
898   uint8_t payload[kPayloadBytes] = {0};
899   RTPHeader rtp_info;
900   PopulateRtpInfo(0, 0, &rtp_info);
901   rtp_info.payloadType = 1;  // Not registered as a decoder.
902   EXPECT_EQ(NetEq::kFail, neteq_->InsertPacket(rtp_info, payload, 0));
903 }
904 
905 #if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
906 #define MAYBE_DecoderError DecoderError
907 #else
908 #define MAYBE_DecoderError DISABLED_DecoderError
909 #endif
910 
TEST_F(NetEqDecodingTest,MAYBE_DecoderError)911 TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
912   const size_t kPayloadBytes = 100;
913   uint8_t payload[kPayloadBytes] = {0};
914   RTPHeader rtp_info;
915   PopulateRtpInfo(0, 0, &rtp_info);
916   rtp_info.payloadType = 103;  // iSAC, but the payload is invalid.
917   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
918   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
919   // to GetAudio.
920   int16_t* out_frame_data = out_frame_.mutable_data();
921   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
922     out_frame_data[i] = 1;
923   }
924   bool muted;
925   EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted));
926   ASSERT_FALSE(muted);
927 
928   // Verify that the first 160 samples are set to 0.
929   static const int kExpectedOutputLength = 160;  // 10 ms at 16 kHz sample rate.
930   const int16_t* const_out_frame_data = out_frame_.data();
931   for (int i = 0; i < kExpectedOutputLength; ++i) {
932     std::ostringstream ss;
933     ss << "i = " << i;
934     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
935     EXPECT_EQ(0, const_out_frame_data[i]);
936   }
937 }
938 
TEST_F(NetEqDecodingTest,GetAudioBeforeInsertPacket)939 TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
940   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
941   // to GetAudio.
942   int16_t* out_frame_data = out_frame_.mutable_data();
943   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
944     out_frame_data[i] = 1;
945   }
946   bool muted;
947   EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
948   ASSERT_FALSE(muted);
949   // Verify that the first block of samples is set to 0.
950   static const int kExpectedOutputLength =
951       kInitSampleRateHz / 100;  // 10 ms at initial sample rate.
952   const int16_t* const_out_frame_data = out_frame_.data();
953   for (int i = 0; i < kExpectedOutputLength; ++i) {
954     std::ostringstream ss;
955     ss << "i = " << i;
956     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
957     EXPECT_EQ(0, const_out_frame_data[i]);
958   }
959   // Verify that the sample rate did not change from the initial configuration.
960   EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz());
961 }
962 
963 class NetEqBgnTest : public NetEqDecodingTest {
964  protected:
965   virtual void TestCondition(double sum_squared_noise,
966                              bool should_be_faded) = 0;
967 
CheckBgn(int sampling_rate_hz)968   void CheckBgn(int sampling_rate_hz) {
969     size_t expected_samples_per_channel = 0;
970     uint8_t payload_type = 0xFF;  // Invalid.
971     if (sampling_rate_hz == 8000) {
972       expected_samples_per_channel = kBlockSize8kHz;
973       payload_type = 93;  // PCM 16, 8 kHz.
974     } else if (sampling_rate_hz == 16000) {
975       expected_samples_per_channel = kBlockSize16kHz;
976       payload_type = 94;  // PCM 16, 16 kHZ.
977     } else if (sampling_rate_hz == 32000) {
978       expected_samples_per_channel = kBlockSize32kHz;
979       payload_type = 95;  // PCM 16, 32 kHz.
980     } else {
981       ASSERT_TRUE(false);  // Unsupported test case.
982     }
983 
984     AudioFrame output;
985     test::AudioLoop input;
986     // We are using the same 32 kHz input file for all tests, regardless of
987     // |sampling_rate_hz|. The output may sound weird, but the test is still
988     // valid.
989     ASSERT_TRUE(input.Init(
990         webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
991         10 * sampling_rate_hz,  // Max 10 seconds loop length.
992         expected_samples_per_channel));
993 
994     // Payload of 10 ms of PCM16 32 kHz.
995     uint8_t payload[kBlockSize32kHz * sizeof(int16_t)];
996     RTPHeader rtp_info;
997     PopulateRtpInfo(0, 0, &rtp_info);
998     rtp_info.payloadType = payload_type;
999 
1000     uint32_t receive_timestamp = 0;
1001     bool muted;
1002     for (int n = 0; n < 10; ++n) {  // Insert few packets and get audio.
1003       auto block = input.GetNextBlock();
1004       ASSERT_EQ(expected_samples_per_channel, block.size());
1005       size_t enc_len_bytes =
1006           WebRtcPcm16b_Encode(block.data(), block.size(), payload);
1007       ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2);
1008 
1009       ASSERT_EQ(0, neteq_->InsertPacket(
1010                        rtp_info,
1011                        rtc::ArrayView<const uint8_t>(payload, enc_len_bytes),
1012                        receive_timestamp));
1013       output.Reset();
1014       ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1015       ASSERT_EQ(1u, output.num_channels_);
1016       ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_);
1017       ASSERT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);
1018 
1019       // Next packet.
1020       rtp_info.timestamp += rtc::checked_cast<uint32_t>(
1021           expected_samples_per_channel);
1022       rtp_info.sequenceNumber++;
1023       receive_timestamp += rtc::checked_cast<uint32_t>(
1024           expected_samples_per_channel);
1025     }
1026 
1027     output.Reset();
1028 
1029     // Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull
1030     // one frame without checking speech-type. This is the first frame pulled
1031     // without inserting any packet, and might not be labeled as PLC.
1032     ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1033     ASSERT_EQ(1u, output.num_channels_);
1034     ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_);
1035 
1036     // To be able to test the fading of background noise we need at lease to
1037     // pull 611 frames.
1038     const int kFadingThreshold = 611;
1039 
1040     // Test several CNG-to-PLC packet for the expected behavior. The number 20
1041     // is arbitrary, but sufficiently large to test enough number of frames.
1042     const int kNumPlcToCngTestFrames = 20;
1043     bool plc_to_cng = false;
1044     for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
1045       output.Reset();
1046       // Set to non-zero.
1047       memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes);
1048       ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1049       ASSERT_FALSE(muted);
1050       ASSERT_EQ(1u, output.num_channels_);
1051       ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_);
1052       if (output.speech_type_ == AudioFrame::kPLCCNG) {
1053         plc_to_cng = true;
1054         double sum_squared = 0;
1055         const int16_t* output_data = output.data();
1056         for (size_t k = 0;
1057              k < output.num_channels_ * output.samples_per_channel_; ++k)
1058           sum_squared += output_data[k] * output_data[k];
1059         TestCondition(sum_squared, n > kFadingThreshold);
1060       } else {
1061         EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
1062       }
1063     }
1064     EXPECT_TRUE(plc_to_cng);  // Just to be sure that PLC-to-CNG has occurred.
1065   }
1066 };
1067 
1068 class NetEqBgnTestOn : public NetEqBgnTest {
1069  protected:
NetEqBgnTestOn()1070   NetEqBgnTestOn() : NetEqBgnTest() {
1071     config_.background_noise_mode = NetEq::kBgnOn;
1072   }
1073 
TestCondition(double sum_squared_noise,bool)1074   void TestCondition(double sum_squared_noise, bool /*should_be_faded*/) {
1075     EXPECT_NE(0, sum_squared_noise);
1076   }
1077 };
1078 
1079 class NetEqBgnTestOff : public NetEqBgnTest {
1080  protected:
NetEqBgnTestOff()1081   NetEqBgnTestOff() : NetEqBgnTest() {
1082     config_.background_noise_mode = NetEq::kBgnOff;
1083   }
1084 
TestCondition(double sum_squared_noise,bool)1085   void TestCondition(double sum_squared_noise, bool /*should_be_faded*/) {
1086     EXPECT_EQ(0, sum_squared_noise);
1087   }
1088 };
1089 
1090 class NetEqBgnTestFade : public NetEqBgnTest {
1091  protected:
NetEqBgnTestFade()1092   NetEqBgnTestFade() : NetEqBgnTest() {
1093     config_.background_noise_mode = NetEq::kBgnFade;
1094   }
1095 
TestCondition(double sum_squared_noise,bool should_be_faded)1096   void TestCondition(double sum_squared_noise, bool should_be_faded) {
1097     if (should_be_faded)
1098       EXPECT_EQ(0, sum_squared_noise);
1099   }
1100 };
1101 
TEST_F(NetEqBgnTestOn,RunTest)1102 TEST_F(NetEqBgnTestOn, RunTest) {
1103   CheckBgn(8000);
1104   CheckBgn(16000);
1105   CheckBgn(32000);
1106 }
1107 
TEST_F(NetEqBgnTestOff,RunTest)1108 TEST_F(NetEqBgnTestOff, RunTest) {
1109   CheckBgn(8000);
1110   CheckBgn(16000);
1111   CheckBgn(32000);
1112 }
1113 
TEST_F(NetEqBgnTestFade,RunTest)1114 TEST_F(NetEqBgnTestFade, RunTest) {
1115   CheckBgn(8000);
1116   CheckBgn(16000);
1117   CheckBgn(32000);
1118 }
1119 
WrapTest(uint16_t start_seq_no,uint32_t start_timestamp,const std::set<uint16_t> & drop_seq_numbers,bool expect_seq_no_wrap,bool expect_timestamp_wrap)1120 void NetEqDecodingTest::WrapTest(uint16_t start_seq_no,
1121                                  uint32_t start_timestamp,
1122                                  const std::set<uint16_t>& drop_seq_numbers,
1123                                  bool expect_seq_no_wrap,
1124                                  bool expect_timestamp_wrap) {
1125   uint16_t seq_no = start_seq_no;
1126   uint32_t timestamp = start_timestamp;
1127   const int kBlocksPerFrame = 3;  // Number of 10 ms blocks per frame.
1128   const int kFrameSizeMs = kBlocksPerFrame * kTimeStepMs;
1129   const int kSamples = kBlockSize16kHz * kBlocksPerFrame;
1130   const size_t kPayloadBytes = kSamples * sizeof(int16_t);
1131   double next_input_time_ms = 0.0;
1132   uint32_t receive_timestamp = 0;
1133 
1134   // Insert speech for 2 seconds.
1135   const int kSpeechDurationMs = 2000;
1136   int packets_inserted = 0;
1137   uint16_t last_seq_no;
1138   uint32_t last_timestamp;
1139   bool timestamp_wrapped = false;
1140   bool seq_no_wrapped = false;
1141   for (double t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
1142     // Each turn in this for loop is 10 ms.
1143     while (next_input_time_ms <= t_ms) {
1144       // Insert one 30 ms speech frame.
1145       uint8_t payload[kPayloadBytes] = {0};
1146       RTPHeader rtp_info;
1147       PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1148       if (drop_seq_numbers.find(seq_no) == drop_seq_numbers.end()) {
1149         // This sequence number was not in the set to drop. Insert it.
1150         ASSERT_EQ(0,
1151                   neteq_->InsertPacket(rtp_info, payload, receive_timestamp));
1152         ++packets_inserted;
1153       }
1154       NetEqNetworkStatistics network_stats;
1155       ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
1156 
1157       // Due to internal NetEq logic, preferred buffer-size is about 4 times the
1158       // packet size for first few packets. Therefore we refrain from checking
1159       // the criteria.
1160       if (packets_inserted > 4) {
1161         // Expect preferred and actual buffer size to be no more than 2 frames.
1162         EXPECT_LE(network_stats.preferred_buffer_size_ms, kFrameSizeMs * 2);
1163         EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2 +
1164                   algorithmic_delay_ms_);
1165       }
1166       last_seq_no = seq_no;
1167       last_timestamp = timestamp;
1168 
1169       ++seq_no;
1170       timestamp += kSamples;
1171       receive_timestamp += kSamples;
1172       next_input_time_ms += static_cast<double>(kFrameSizeMs);
1173 
1174       seq_no_wrapped |= seq_no < last_seq_no;
1175       timestamp_wrapped |= timestamp < last_timestamp;
1176     }
1177     // Pull out data once.
1178     AudioFrame output;
1179     bool muted;
1180     ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1181     ASSERT_EQ(kBlockSize16kHz, output.samples_per_channel_);
1182     ASSERT_EQ(1u, output.num_channels_);
1183 
1184     // Expect delay (in samples) to be less than 2 packets.
1185     rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
1186     ASSERT_TRUE(playout_timestamp);
1187     EXPECT_LE(timestamp - *playout_timestamp,
1188               static_cast<uint32_t>(kSamples * 2));
1189   }
1190   // Make sure we have actually tested wrap-around.
1191   ASSERT_EQ(expect_seq_no_wrap, seq_no_wrapped);
1192   ASSERT_EQ(expect_timestamp_wrap, timestamp_wrapped);
1193 }
1194 
TEST_F(NetEqDecodingTest,SequenceNumberWrap)1195 TEST_F(NetEqDecodingTest, SequenceNumberWrap) {
1196   // Start with a sequence number that will soon wrap.
1197   std::set<uint16_t> drop_seq_numbers;  // Don't drop any packets.
1198   WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false);
1199 }
1200 
TEST_F(NetEqDecodingTest,SequenceNumberWrapAndDrop)1201 TEST_F(NetEqDecodingTest, SequenceNumberWrapAndDrop) {
1202   // Start with a sequence number that will soon wrap.
1203   std::set<uint16_t> drop_seq_numbers;
1204   drop_seq_numbers.insert(0xFFFF);
1205   drop_seq_numbers.insert(0x0);
1206   WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false);
1207 }
1208 
TEST_F(NetEqDecodingTest,TimestampWrap)1209 TEST_F(NetEqDecodingTest, TimestampWrap) {
1210   // Start with a timestamp that will soon wrap.
1211   std::set<uint16_t> drop_seq_numbers;
1212   WrapTest(0, 0xFFFFFFFF - 3000, drop_seq_numbers, false, true);
1213 }
1214 
TEST_F(NetEqDecodingTest,TimestampAndSequenceNumberWrap)1215 TEST_F(NetEqDecodingTest, TimestampAndSequenceNumberWrap) {
1216   // Start with a timestamp and a sequence number that will wrap at the same
1217   // time.
1218   std::set<uint16_t> drop_seq_numbers;
1219   WrapTest(0xFFFF - 10, 0xFFFFFFFF - 5000, drop_seq_numbers, true, true);
1220 }
1221 
DuplicateCng()1222 void NetEqDecodingTest::DuplicateCng() {
1223   uint16_t seq_no = 0;
1224   uint32_t timestamp = 0;
1225   const int kFrameSizeMs = 10;
1226   const int kSampleRateKhz = 16;
1227   const int kSamples = kFrameSizeMs * kSampleRateKhz;
1228   const size_t kPayloadBytes = kSamples * 2;
1229 
1230   const int algorithmic_delay_samples = std::max(
1231       algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8);
1232   // Insert three speech packets. Three are needed to get the frame length
1233   // correct.
1234   uint8_t payload[kPayloadBytes] = {0};
1235   RTPHeader rtp_info;
1236   bool muted;
1237   for (int i = 0; i < 3; ++i) {
1238     PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1239     ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1240     ++seq_no;
1241     timestamp += kSamples;
1242 
1243     // Pull audio once.
1244     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1245     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1246   }
1247   // Verify speech output.
1248   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1249 
1250   // Insert same CNG packet twice.
1251   const int kCngPeriodMs = 100;
1252   const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz;
1253   size_t payload_len;
1254   PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
1255   // This is the first time this CNG packet is inserted.
1256   ASSERT_EQ(
1257       0, neteq_->InsertPacket(
1258              rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1259 
1260   // Pull audio once and make sure CNG is played.
1261   ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1262   ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1263   EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1264   EXPECT_FALSE(
1265       neteq_->GetPlayoutTimestamp());  // Returns empty value during CNG.
1266   EXPECT_EQ(timestamp - algorithmic_delay_samples,
1267             out_frame_.timestamp_ + out_frame_.samples_per_channel_);
1268 
1269   // Insert the same CNG packet again. Note that at this point it is old, since
1270   // we have already decoded the first copy of it.
1271   ASSERT_EQ(
1272       0, neteq_->InsertPacket(
1273              rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1274 
1275   // Pull audio until we have played |kCngPeriodMs| of CNG. Start at 10 ms since
1276   // we have already pulled out CNG once.
1277   for (int cng_time_ms = 10; cng_time_ms < kCngPeriodMs; cng_time_ms += 10) {
1278     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1279     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1280     EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1281     EXPECT_FALSE(
1282         neteq_->GetPlayoutTimestamp());  // Returns empty value during CNG.
1283     EXPECT_EQ(timestamp - algorithmic_delay_samples,
1284               out_frame_.timestamp_ + out_frame_.samples_per_channel_);
1285   }
1286 
1287   // Insert speech again.
1288   ++seq_no;
1289   timestamp += kCngPeriodSamples;
1290   PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1291   ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1292 
1293   // Pull audio once and verify that the output is speech again.
1294   ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1295   ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1296   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1297   rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
1298   ASSERT_TRUE(playout_timestamp);
1299   EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
1300             *playout_timestamp);
1301 }
1302 
TEST_F(NetEqDecodingTest,DiscardDuplicateCng)1303 TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
1304 
TEST_F(NetEqDecodingTest,CngFirst)1305 TEST_F(NetEqDecodingTest, CngFirst) {
1306   uint16_t seq_no = 0;
1307   uint32_t timestamp = 0;
1308   const int kFrameSizeMs = 10;
1309   const int kSampleRateKhz = 16;
1310   const int kSamples = kFrameSizeMs * kSampleRateKhz;
1311   const int kPayloadBytes = kSamples * 2;
1312   const int kCngPeriodMs = 100;
1313   const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz;
1314   size_t payload_len;
1315 
1316   uint8_t payload[kPayloadBytes] = {0};
1317   RTPHeader rtp_info;
1318 
1319   PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
1320   ASSERT_EQ(
1321       NetEq::kOK,
1322       neteq_->InsertPacket(
1323           rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1324   ++seq_no;
1325   timestamp += kCngPeriodSamples;
1326 
1327   // Pull audio once and make sure CNG is played.
1328   bool muted;
1329   ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1330   ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1331   EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1332 
1333   // Insert some speech packets.
1334   const uint32_t first_speech_timestamp = timestamp;
1335   int timeout_counter = 0;
1336   do {
1337     ASSERT_LT(timeout_counter++, 20) << "Test timed out";
1338     PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1339     ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1340     ++seq_no;
1341     timestamp += kSamples;
1342 
1343     // Pull audio once.
1344     ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1345     ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1346   } while (!IsNewerTimestamp(out_frame_.timestamp_, first_speech_timestamp));
1347   // Verify speech output.
1348   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1349 }
1350 
1351 class NetEqDecodingTestWithMutedState : public NetEqDecodingTest {
1352  public:
NetEqDecodingTestWithMutedState()1353   NetEqDecodingTestWithMutedState() : NetEqDecodingTest() {
1354     config_.enable_muted_state = true;
1355   }
1356 
1357  protected:
1358   static constexpr size_t kSamples = 10 * 16;
1359   static constexpr size_t kPayloadBytes = kSamples * 2;
1360 
InsertPacket(uint32_t rtp_timestamp)1361   void InsertPacket(uint32_t rtp_timestamp) {
1362     uint8_t payload[kPayloadBytes] = {0};
1363     RTPHeader rtp_info;
1364     PopulateRtpInfo(0, rtp_timestamp, &rtp_info);
1365     EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1366   }
1367 
InsertCngPacket(uint32_t rtp_timestamp)1368   void InsertCngPacket(uint32_t rtp_timestamp) {
1369     uint8_t payload[kPayloadBytes] = {0};
1370     RTPHeader rtp_info;
1371     size_t payload_len;
1372     PopulateCng(0, rtp_timestamp, &rtp_info, payload, &payload_len);
1373     EXPECT_EQ(
1374         NetEq::kOK,
1375         neteq_->InsertPacket(
1376             rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1377   }
1378 
GetAudioReturnMuted()1379   bool GetAudioReturnMuted() {
1380     bool muted;
1381     EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1382     return muted;
1383   }
1384 
GetAudioUntilMuted()1385   void GetAudioUntilMuted() {
1386     while (!GetAudioReturnMuted()) {
1387       ASSERT_LT(counter_++, 1000) << "Test timed out";
1388     }
1389   }
1390 
GetAudioUntilNormal()1391   void GetAudioUntilNormal() {
1392     bool muted = false;
1393     while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) {
1394       EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1395       ASSERT_LT(counter_++, 1000) << "Test timed out";
1396     }
1397     EXPECT_FALSE(muted);
1398   }
1399 
1400   int counter_ = 0;
1401 };
1402 
1403 // Verifies that NetEq goes in and out of muted state as expected.
TEST_F(NetEqDecodingTestWithMutedState,MutedState)1404 TEST_F(NetEqDecodingTestWithMutedState, MutedState) {
1405   // Insert one speech packet.
1406   InsertPacket(0);
1407   // Pull out audio once and expect it not to be muted.
1408   EXPECT_FALSE(GetAudioReturnMuted());
1409   // Pull data until faded out.
1410   GetAudioUntilMuted();
1411   EXPECT_TRUE(out_frame_.muted());
1412 
1413   // Verify that output audio is not written during muted mode. Other parameters
1414   // should be correct, though.
1415   AudioFrame new_frame;
1416   int16_t* frame_data = new_frame.mutable_data();
1417   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
1418     frame_data[i] = 17;
1419   }
1420   bool muted;
1421   EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted));
1422   EXPECT_TRUE(muted);
1423   EXPECT_TRUE(out_frame_.muted());
1424   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
1425     EXPECT_EQ(17, frame_data[i]);
1426   }
1427   EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_,
1428             new_frame.timestamp_);
1429   EXPECT_EQ(out_frame_.samples_per_channel_, new_frame.samples_per_channel_);
1430   EXPECT_EQ(out_frame_.sample_rate_hz_, new_frame.sample_rate_hz_);
1431   EXPECT_EQ(out_frame_.num_channels_, new_frame.num_channels_);
1432   EXPECT_EQ(out_frame_.speech_type_, new_frame.speech_type_);
1433   EXPECT_EQ(out_frame_.vad_activity_, new_frame.vad_activity_);
1434 
1435   // Insert new data. Timestamp is corrected for the time elapsed since the last
1436   // packet. Verify that normal operation resumes.
1437   InsertPacket(kSamples * counter_);
1438   GetAudioUntilNormal();
1439   EXPECT_FALSE(out_frame_.muted());
1440 
1441   NetEqNetworkStatistics stats;
1442   EXPECT_EQ(0, neteq_->NetworkStatistics(&stats));
1443   // NetEqNetworkStatistics::expand_rate tells the fraction of samples that were
1444   // concealment samples, in Q14 (16384 = 100%) .The vast majority should be
1445   // concealment samples in this test.
1446   EXPECT_GT(stats.expand_rate, 14000);
1447   // And, it should be greater than the speech_expand_rate.
1448   EXPECT_GT(stats.expand_rate, stats.speech_expand_rate);
1449 }
1450 
1451 // Verifies that NetEq goes out of muted state when given a delayed packet.
TEST_F(NetEqDecodingTestWithMutedState,MutedStateDelayedPacket)1452 TEST_F(NetEqDecodingTestWithMutedState, MutedStateDelayedPacket) {
1453   // Insert one speech packet.
1454   InsertPacket(0);
1455   // Pull out audio once and expect it not to be muted.
1456   EXPECT_FALSE(GetAudioReturnMuted());
1457   // Pull data until faded out.
1458   GetAudioUntilMuted();
1459   // Insert new data. Timestamp is only corrected for the half of the time
1460   // elapsed since the last packet. That is, the new packet is delayed. Verify
1461   // that normal operation resumes.
1462   InsertPacket(kSamples * counter_ / 2);
1463   GetAudioUntilNormal();
1464 }
1465 
1466 // Verifies that NetEq goes out of muted state when given a future packet.
TEST_F(NetEqDecodingTestWithMutedState,MutedStateFuturePacket)1467 TEST_F(NetEqDecodingTestWithMutedState, MutedStateFuturePacket) {
1468   // Insert one speech packet.
1469   InsertPacket(0);
1470   // Pull out audio once and expect it not to be muted.
1471   EXPECT_FALSE(GetAudioReturnMuted());
1472   // Pull data until faded out.
1473   GetAudioUntilMuted();
1474   // Insert new data. Timestamp is over-corrected for the time elapsed since the
1475   // last packet. That is, the new packet is too early. Verify that normal
1476   // operation resumes.
1477   InsertPacket(kSamples * counter_ * 2);
1478   GetAudioUntilNormal();
1479 }
1480 
1481 // Verifies that NetEq goes out of muted state when given an old packet.
TEST_F(NetEqDecodingTestWithMutedState,MutedStateOldPacket)1482 TEST_F(NetEqDecodingTestWithMutedState, MutedStateOldPacket) {
1483   // Insert one speech packet.
1484   InsertPacket(0);
1485   // Pull out audio once and expect it not to be muted.
1486   EXPECT_FALSE(GetAudioReturnMuted());
1487   // Pull data until faded out.
1488   GetAudioUntilMuted();
1489 
1490   EXPECT_NE(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1491   // Insert packet which is older than the first packet.
1492   InsertPacket(kSamples * (counter_ - 1000));
1493   EXPECT_FALSE(GetAudioReturnMuted());
1494   EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1495 }
1496 
1497 // Verifies that NetEq doesn't enter muted state when CNG mode is active and the
1498 // packet stream is suspended for a long time.
TEST_F(NetEqDecodingTestWithMutedState,DoNotMuteExtendedCngWithoutPackets)1499 TEST_F(NetEqDecodingTestWithMutedState, DoNotMuteExtendedCngWithoutPackets) {
1500   // Insert one CNG packet.
1501   InsertCngPacket(0);
1502 
1503   // Pull 10 seconds of audio (10 ms audio generated per lap).
1504   for (int i = 0; i < 1000; ++i) {
1505     bool muted;
1506     EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1507     ASSERT_FALSE(muted);
1508   }
1509   EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1510 }
1511 
1512 // Verifies that NetEq goes back to normal after a long CNG period with the
1513 // packet stream suspended.
TEST_F(NetEqDecodingTestWithMutedState,RecoverAfterExtendedCngWithoutPackets)1514 TEST_F(NetEqDecodingTestWithMutedState, RecoverAfterExtendedCngWithoutPackets) {
1515   // Insert one CNG packet.
1516   InsertCngPacket(0);
1517 
1518   // Pull 10 seconds of audio (10 ms audio generated per lap).
1519   for (int i = 0; i < 1000; ++i) {
1520     bool muted;
1521     EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1522   }
1523 
1524   // Insert new data. Timestamp is corrected for the time elapsed since the last
1525   // packet. Verify that normal operation resumes.
1526   InsertPacket(kSamples * counter_);
1527   GetAudioUntilNormal();
1528 }
1529 
1530 class NetEqDecodingTestTwoInstances : public NetEqDecodingTest {
1531  public:
NetEqDecodingTestTwoInstances()1532   NetEqDecodingTestTwoInstances() : NetEqDecodingTest() {}
1533 
SetUp()1534   void SetUp() override {
1535     NetEqDecodingTest::SetUp();
1536     config2_ = config_;
1537   }
1538 
CreateSecondInstance()1539   void CreateSecondInstance() {
1540     neteq2_.reset(NetEq::Create(config2_, CreateBuiltinAudioDecoderFactory()));
1541     ASSERT_TRUE(neteq2_);
1542     LoadDecoders(neteq2_.get());
1543   }
1544 
1545  protected:
1546   std::unique_ptr<NetEq> neteq2_;
1547   NetEq::Config config2_;
1548 };
1549 
1550 namespace {
AudioFramesEqualExceptData(const AudioFrame & a,const AudioFrame & b)1551 ::testing::AssertionResult AudioFramesEqualExceptData(const AudioFrame& a,
1552                                                       const AudioFrame& b) {
1553   if (a.timestamp_ != b.timestamp_)
1554     return ::testing::AssertionFailure() << "timestamp_ diff (" << a.timestamp_
1555                                          << " != " << b.timestamp_ << ")";
1556   if (a.sample_rate_hz_ != b.sample_rate_hz_)
1557     return ::testing::AssertionFailure() << "sample_rate_hz_ diff ("
1558                                          << a.sample_rate_hz_
1559                                          << " != " << b.sample_rate_hz_ << ")";
1560   if (a.samples_per_channel_ != b.samples_per_channel_)
1561     return ::testing::AssertionFailure()
1562            << "samples_per_channel_ diff (" << a.samples_per_channel_
1563            << " != " << b.samples_per_channel_ << ")";
1564   if (a.num_channels_ != b.num_channels_)
1565     return ::testing::AssertionFailure() << "num_channels_ diff ("
1566                                          << a.num_channels_
1567                                          << " != " << b.num_channels_ << ")";
1568   if (a.speech_type_ != b.speech_type_)
1569     return ::testing::AssertionFailure() << "speech_type_ diff ("
1570                                          << a.speech_type_
1571                                          << " != " << b.speech_type_ << ")";
1572   if (a.vad_activity_ != b.vad_activity_)
1573     return ::testing::AssertionFailure() << "vad_activity_ diff ("
1574                                          << a.vad_activity_
1575                                          << " != " << b.vad_activity_ << ")";
1576   return ::testing::AssertionSuccess();
1577 }
1578 
AudioFramesEqual(const AudioFrame & a,const AudioFrame & b)1579 ::testing::AssertionResult AudioFramesEqual(const AudioFrame& a,
1580                                             const AudioFrame& b) {
1581   ::testing::AssertionResult res = AudioFramesEqualExceptData(a, b);
1582   if (!res)
1583     return res;
1584   if (memcmp(
1585       a.data(), b.data(),
1586       a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != 0) {
1587     return ::testing::AssertionFailure() << "data_ diff";
1588   }
1589   return ::testing::AssertionSuccess();
1590 }
1591 
1592 }  // namespace
1593 
TEST_F(NetEqDecodingTestTwoInstances,CompareMutedStateOnOff)1594 TEST_F(NetEqDecodingTestTwoInstances, CompareMutedStateOnOff) {
1595   ASSERT_FALSE(config_.enable_muted_state);
1596   config2_.enable_muted_state = true;
1597   CreateSecondInstance();
1598 
1599   // Insert one speech packet into both NetEqs.
1600   const size_t kSamples = 10 * 16;
1601   const size_t kPayloadBytes = kSamples * 2;
1602   uint8_t payload[kPayloadBytes] = {0};
1603   RTPHeader rtp_info;
1604   PopulateRtpInfo(0, 0, &rtp_info);
1605   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1606   EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload, 0));
1607 
1608   AudioFrame out_frame1, out_frame2;
1609   bool muted;
1610   for (int i = 0; i < 1000; ++i) {
1611     std::ostringstream ss;
1612     ss << "i = " << i;
1613     SCOPED_TRACE(ss.str());  // Print out the loop iterator on failure.
1614     EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted));
1615     EXPECT_FALSE(muted);
1616     EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted));
1617     if (muted) {
1618       EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2));
1619     } else {
1620       EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2));
1621     }
1622   }
1623   EXPECT_TRUE(muted);
1624 
1625   // Insert new data. Timestamp is corrected for the time elapsed since the last
1626   // packet.
1627   PopulateRtpInfo(0, kSamples * 1000, &rtp_info);
1628   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1629   EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload, 0));
1630 
1631   int counter = 0;
1632   while (out_frame1.speech_type_ != AudioFrame::kNormalSpeech) {
1633     ASSERT_LT(counter++, 1000) << "Test timed out";
1634     std::ostringstream ss;
1635     ss << "counter = " << counter;
1636     SCOPED_TRACE(ss.str());  // Print out the loop iterator on failure.
1637     EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted));
1638     EXPECT_FALSE(muted);
1639     EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted));
1640     if (muted) {
1641       EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2));
1642     } else {
1643       EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2));
1644     }
1645   }
1646   EXPECT_FALSE(muted);
1647 }
1648 
TEST_F(NetEqDecodingTest,LastDecodedTimestampsEmpty)1649 TEST_F(NetEqDecodingTest, LastDecodedTimestampsEmpty) {
1650   EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
1651 
1652   // Pull out data once.
1653   AudioFrame output;
1654   bool muted;
1655   ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1656 
1657   EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
1658 }
1659 
TEST_F(NetEqDecodingTest,LastDecodedTimestampsOneDecoded)1660 TEST_F(NetEqDecodingTest, LastDecodedTimestampsOneDecoded) {
1661   // Insert one packet with PCM16b WB data (this is what PopulateRtpInfo does by
1662   // default). Make the length 10 ms.
1663   constexpr size_t kPayloadSamples = 16 * 10;
1664   constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
1665   uint8_t payload[kPayloadBytes] = {0};
1666 
1667   RTPHeader rtp_info;
1668   constexpr uint32_t kRtpTimestamp = 0x1234;
1669   PopulateRtpInfo(0, kRtpTimestamp, &rtp_info);
1670   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1671 
1672   // Pull out data once.
1673   AudioFrame output;
1674   bool muted;
1675   ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1676 
1677   EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp}),
1678             neteq_->LastDecodedTimestamps());
1679 
1680   // Nothing decoded on the second call.
1681   ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1682   EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
1683 }
1684 
TEST_F(NetEqDecodingTest,LastDecodedTimestampsTwoDecoded)1685 TEST_F(NetEqDecodingTest, LastDecodedTimestampsTwoDecoded) {
1686   // Insert two packets with PCM16b WB data (this is what PopulateRtpInfo does
1687   // by default). Make the length 5 ms so that NetEq must decode them both in
1688   // the same GetAudio call.
1689   constexpr size_t kPayloadSamples = 16 * 5;
1690   constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
1691   uint8_t payload[kPayloadBytes] = {0};
1692 
1693   RTPHeader rtp_info;
1694   constexpr uint32_t kRtpTimestamp1 = 0x1234;
1695   PopulateRtpInfo(0, kRtpTimestamp1, &rtp_info);
1696   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1697   constexpr uint32_t kRtpTimestamp2 = kRtpTimestamp1 + kPayloadSamples;
1698   PopulateRtpInfo(1, kRtpTimestamp2, &rtp_info);
1699   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1700 
1701   // Pull out data once.
1702   AudioFrame output;
1703   bool muted;
1704   ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1705 
1706   EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp1, kRtpTimestamp2}),
1707             neteq_->LastDecodedTimestamps());
1708 }
1709 
TEST_F(NetEqDecodingTest,TestConcealmentEvents)1710 TEST_F(NetEqDecodingTest, TestConcealmentEvents) {
1711   const int kNumConcealmentEvents = 19;
1712   const size_t kSamples = 10 * 16;
1713   const size_t kPayloadBytes = kSamples * 2;
1714   int seq_no = 0;
1715   RTPHeader rtp_info;
1716   rtp_info.ssrc = 0x1234;     // Just an arbitrary SSRC.
1717   rtp_info.payloadType = 94;  // PCM16b WB codec.
1718   rtp_info.markerBit = 0;
1719   const uint8_t payload[kPayloadBytes] = {0};
1720   bool muted;
1721 
1722   for (int i = 0; i < kNumConcealmentEvents; i++) {
1723     // Insert some packets of 10 ms size.
1724     for (int j = 0; j < 10; j++) {
1725       rtp_info.sequenceNumber = seq_no++;
1726       rtp_info.timestamp = rtp_info.sequenceNumber * kSamples;
1727       neteq_->InsertPacket(rtp_info, payload, 0);
1728       neteq_->GetAudio(&out_frame_, &muted);
1729     }
1730 
1731     // Lose a number of packets.
1732     int num_lost = 1 + i;
1733     for (int j = 0; j < num_lost; j++) {
1734       seq_no++;
1735       neteq_->GetAudio(&out_frame_, &muted);
1736     }
1737   }
1738 
1739   // Check number of concealment events.
1740   NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics();
1741   EXPECT_EQ(kNumConcealmentEvents, static_cast<int>(stats.concealment_events));
1742 }
1743 
1744 // Test that the jitter buffer delay stat is computed correctly.
TestJitterBufferDelay(bool apply_packet_loss)1745 void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) {
1746   const int kNumPackets = 10;
1747   const int kDelayInNumPackets = 2;
1748   const int kPacketLenMs = 10;  // All packets are of 10 ms size.
1749   const size_t kSamples = kPacketLenMs * 16;
1750   const size_t kPayloadBytes = kSamples * 2;
1751   RTPHeader rtp_info;
1752   rtp_info.ssrc = 0x1234;     // Just an arbitrary SSRC.
1753   rtp_info.payloadType = 94;  // PCM16b WB codec.
1754   rtp_info.markerBit = 0;
1755   const uint8_t payload[kPayloadBytes] = {0};
1756   bool muted;
1757   int packets_sent = 0;
1758   int packets_received = 0;
1759   int expected_delay = 0;
1760   while (packets_received < kNumPackets) {
1761     // Insert packet.
1762     if (packets_sent < kNumPackets) {
1763       rtp_info.sequenceNumber = packets_sent++;
1764       rtp_info.timestamp = rtp_info.sequenceNumber * kSamples;
1765       neteq_->InsertPacket(rtp_info, payload, 0);
1766     }
1767 
1768     // Get packet.
1769     if (packets_sent > kDelayInNumPackets) {
1770       neteq_->GetAudio(&out_frame_, &muted);
1771       packets_received++;
1772 
1773       // The delay reported by the jitter buffer never exceeds
1774       // the number of samples previously fetched with GetAudio
1775       // (hence the min()).
1776       int packets_delay = std::min(packets_received, kDelayInNumPackets + 1);
1777 
1778       // The increase of the expected delay is the product of
1779       // the current delay of the jitter buffer in ms * the
1780       // number of samples that are sent for play out.
1781       int current_delay_ms = packets_delay * kPacketLenMs;
1782       expected_delay += current_delay_ms * kSamples;
1783     }
1784   }
1785 
1786   if (apply_packet_loss) {
1787     // Extra call to GetAudio to cause concealment.
1788     neteq_->GetAudio(&out_frame_, &muted);
1789   }
1790 
1791   // Check jitter buffer delay.
1792   NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics();
1793   EXPECT_EQ(expected_delay, static_cast<int>(stats.jitter_buffer_delay_ms));
1794 }
1795 
TEST_F(NetEqDecodingTestFaxMode,TestJitterBufferDelayWithoutLoss)1796 TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) {
1797   TestJitterBufferDelay(false);
1798 }
1799 
TEST_F(NetEqDecodingTestFaxMode,TestJitterBufferDelayWithLoss)1800 TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) {
1801   TestJitterBufferDelay(true);
1802 }
1803 
1804 }  // namespace webrtc
1805