1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_coding/neteq/include/neteq.h"
12
13 #include <math.h>
14 #include <stdlib.h>
15 #include <string.h> // memset
16
17 #include <algorithm>
18 #include <memory>
19 #include <set>
20 #include <string>
21 #include <vector>
22
23 #include "api/audio_codecs/builtin_audio_decoder_factory.h"
24 #include "common_types.h" // NOLINT(build/include)
25 #include "modules/audio_coding/codecs/pcm16b/pcm16b.h"
26 #include "modules/audio_coding/neteq/tools/audio_loop.h"
27 #include "modules/audio_coding/neteq/tools/rtp_file_source.h"
28 #include "modules/include/module_common_types.h"
29 #include "rtc_base/flags.h"
30 #include "rtc_base/ignore_wundef.h"
31 #include "rtc_base/numerics/safe_conversions.h"
32 #include "rtc_base/protobuf_utils.h"
33 #include "rtc_base/sha1digest.h"
34 #include "rtc_base/stringencode.h"
35 #include "test/field_trial.h"
36 #include "test/gtest.h"
37 #include "test/testsupport/fileutils.h"
38 #include "typedefs.h" // NOLINT(build/include)
39
40 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
41 RTC_PUSH_IGNORING_WUNDEF()
42 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
43 #include "external/webrtc/webrtc/modules/audio_coding/neteq/neteq_unittest.pb.h"
44 #else
45 #include "modules/audio_coding/neteq/neteq_unittest.pb.h"
46 #endif
47 RTC_POP_IGNORING_WUNDEF()
48 #endif
49
50 DEFINE_bool(gen_ref, false, "Generate reference files.");
51
52 namespace webrtc {
53
54 namespace {
55
PlatformChecksum(const std::string & checksum_general,const std::string & checksum_android_32,const std::string & checksum_android_64,const std::string & checksum_win_32,const std::string & checksum_win_64)56 const std::string& PlatformChecksum(const std::string& checksum_general,
57 const std::string& checksum_android_32,
58 const std::string& checksum_android_64,
59 const std::string& checksum_win_32,
60 const std::string& checksum_win_64) {
61 #if defined(WEBRTC_ANDROID)
62 #ifdef WEBRTC_ARCH_64_BITS
63 return checksum_android_64;
64 #else
65 return checksum_android_32;
66 #endif // WEBRTC_ARCH_64_BITS
67 #elif defined(WEBRTC_WIN)
68 #ifdef WEBRTC_ARCH_64_BITS
69 return checksum_win_64;
70 #else
71 return checksum_win_32;
72 #endif // WEBRTC_ARCH_64_BITS
73 #else
74 return checksum_general;
75 #endif // WEBRTC_WIN
76 }
77
78 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
Convert(const webrtc::NetEqNetworkStatistics & stats_raw,webrtc::neteq_unittest::NetEqNetworkStatistics * stats)79 void Convert(const webrtc::NetEqNetworkStatistics& stats_raw,
80 webrtc::neteq_unittest::NetEqNetworkStatistics* stats) {
81 stats->set_current_buffer_size_ms(stats_raw.current_buffer_size_ms);
82 stats->set_preferred_buffer_size_ms(stats_raw.preferred_buffer_size_ms);
83 stats->set_jitter_peaks_found(stats_raw.jitter_peaks_found);
84 stats->set_packet_loss_rate(stats_raw.packet_loss_rate);
85 stats->set_expand_rate(stats_raw.expand_rate);
86 stats->set_speech_expand_rate(stats_raw.speech_expand_rate);
87 stats->set_preemptive_rate(stats_raw.preemptive_rate);
88 stats->set_accelerate_rate(stats_raw.accelerate_rate);
89 stats->set_secondary_decoded_rate(stats_raw.secondary_decoded_rate);
90 stats->set_secondary_discarded_rate(stats_raw.secondary_discarded_rate);
91 stats->set_clockdrift_ppm(stats_raw.clockdrift_ppm);
92 stats->set_added_zero_samples(stats_raw.added_zero_samples);
93 stats->set_mean_waiting_time_ms(stats_raw.mean_waiting_time_ms);
94 stats->set_median_waiting_time_ms(stats_raw.median_waiting_time_ms);
95 stats->set_min_waiting_time_ms(stats_raw.min_waiting_time_ms);
96 stats->set_max_waiting_time_ms(stats_raw.max_waiting_time_ms);
97 }
98
Convert(const webrtc::RtcpStatistics & stats_raw,webrtc::neteq_unittest::RtcpStatistics * stats)99 void Convert(const webrtc::RtcpStatistics& stats_raw,
100 webrtc::neteq_unittest::RtcpStatistics* stats) {
101 stats->set_fraction_lost(stats_raw.fraction_lost);
102 stats->set_cumulative_lost(stats_raw.packets_lost);
103 stats->set_extended_max_sequence_number(
104 stats_raw.extended_highest_sequence_number);
105 stats->set_jitter(stats_raw.jitter);
106 }
107
AddMessage(FILE * file,rtc::MessageDigest * digest,const std::string & message)108 void AddMessage(FILE* file, rtc::MessageDigest* digest,
109 const std::string& message) {
110 int32_t size = message.length();
111 if (file)
112 ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file));
113 digest->Update(&size, sizeof(size));
114
115 if (file)
116 ASSERT_EQ(static_cast<size_t>(size),
117 fwrite(message.data(), sizeof(char), size, file));
118 digest->Update(message.data(), sizeof(char) * size);
119 }
120
121 #endif // WEBRTC_NETEQ_UNITTEST_BITEXACT
122
LoadDecoders(webrtc::NetEq * neteq)123 void LoadDecoders(webrtc::NetEq* neteq) {
124 ASSERT_EQ(true,
125 neteq->RegisterPayloadType(0, SdpAudioFormat("pcmu", 8000, 1)));
126 // Use non-SdpAudioFormat argument when registering PCMa, so that we get test
127 // coverage for that as well.
128 ASSERT_EQ(0, neteq->RegisterPayloadType(webrtc::NetEqDecoder::kDecoderPCMa,
129 "pcma", 8));
130 #ifdef WEBRTC_CODEC_ILBC
131 ASSERT_EQ(true,
132 neteq->RegisterPayloadType(102, SdpAudioFormat("ilbc", 8000, 1)));
133 #endif
134 #if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
135 ASSERT_EQ(true,
136 neteq->RegisterPayloadType(103, SdpAudioFormat("isac", 16000, 1)));
137 #endif
138 #ifdef WEBRTC_CODEC_ISAC
139 ASSERT_EQ(true,
140 neteq->RegisterPayloadType(104, SdpAudioFormat("isac", 32000, 1)));
141 #endif
142 #ifdef WEBRTC_CODEC_OPUS
143 ASSERT_EQ(true,
144 neteq->RegisterPayloadType(
145 111, SdpAudioFormat("opus", 48000, 2, {{"stereo", "0"}})));
146 #endif
147 ASSERT_EQ(true,
148 neteq->RegisterPayloadType(93, SdpAudioFormat("L16", 8000, 1)));
149 ASSERT_EQ(true,
150 neteq->RegisterPayloadType(94, SdpAudioFormat("L16", 16000, 1)));
151 ASSERT_EQ(true,
152 neteq->RegisterPayloadType(95, SdpAudioFormat("L16", 32000, 1)));
153 ASSERT_EQ(true,
154 neteq->RegisterPayloadType(13, SdpAudioFormat("cn", 8000, 1)));
155 ASSERT_EQ(true,
156 neteq->RegisterPayloadType(98, SdpAudioFormat("cn", 16000, 1)));
157 }
158 } // namespace
159
160 class ResultSink {
161 public:
162 explicit ResultSink(const std::string& output_file);
163 ~ResultSink();
164
165 template<typename T> void AddResult(const T* test_results, size_t length);
166
167 void AddResult(const NetEqNetworkStatistics& stats);
168 void AddResult(const RtcpStatistics& stats);
169
170 void VerifyChecksum(const std::string& ref_check_sum);
171
172 private:
173 FILE* output_fp_;
174 std::unique_ptr<rtc::MessageDigest> digest_;
175 };
176
ResultSink(const std::string & output_file)177 ResultSink::ResultSink(const std::string &output_file)
178 : output_fp_(nullptr),
179 digest_(new rtc::Sha1Digest()) {
180 if (!output_file.empty()) {
181 output_fp_ = fopen(output_file.c_str(), "wb");
182 EXPECT_TRUE(output_fp_ != NULL);
183 }
184 }
185
~ResultSink()186 ResultSink::~ResultSink() {
187 if (output_fp_)
188 fclose(output_fp_);
189 }
190
191 template<typename T>
AddResult(const T * test_results,size_t length)192 void ResultSink::AddResult(const T* test_results, size_t length) {
193 if (output_fp_) {
194 ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_));
195 }
196 digest_->Update(test_results, sizeof(T) * length);
197 }
198
AddResult(const NetEqNetworkStatistics & stats_raw)199 void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) {
200 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
201 neteq_unittest::NetEqNetworkStatistics stats;
202 Convert(stats_raw, &stats);
203
204 ProtoString stats_string;
205 ASSERT_TRUE(stats.SerializeToString(&stats_string));
206 AddMessage(output_fp_, digest_.get(), stats_string);
207 #else
208 FAIL() << "Writing to reference file requires Proto Buffer.";
209 #endif // WEBRTC_NETEQ_UNITTEST_BITEXACT
210 }
211
AddResult(const RtcpStatistics & stats_raw)212 void ResultSink::AddResult(const RtcpStatistics& stats_raw) {
213 #ifdef WEBRTC_NETEQ_UNITTEST_BITEXACT
214 neteq_unittest::RtcpStatistics stats;
215 Convert(stats_raw, &stats);
216
217 ProtoString stats_string;
218 ASSERT_TRUE(stats.SerializeToString(&stats_string));
219 AddMessage(output_fp_, digest_.get(), stats_string);
220 #else
221 FAIL() << "Writing to reference file requires Proto Buffer.";
222 #endif // WEBRTC_NETEQ_UNITTEST_BITEXACT
223 }
224
VerifyChecksum(const std::string & checksum)225 void ResultSink::VerifyChecksum(const std::string& checksum) {
226 std::vector<char> buffer;
227 buffer.resize(digest_->Size());
228 digest_->Finish(&buffer[0], buffer.size());
229 const std::string result = rtc::hex_encode(&buffer[0], digest_->Size());
230 EXPECT_EQ(checksum, result);
231 }
232
233 class NetEqDecodingTest : public ::testing::Test {
234 protected:
235 // NetEQ must be polled for data once every 10 ms. Thus, neither of the
236 // constants below can be changed.
237 static const int kTimeStepMs = 10;
238 static const size_t kBlockSize8kHz = kTimeStepMs * 8;
239 static const size_t kBlockSize16kHz = kTimeStepMs * 16;
240 static const size_t kBlockSize32kHz = kTimeStepMs * 32;
241 static const size_t kBlockSize48kHz = kTimeStepMs * 48;
242 static const int kInitSampleRateHz = 8000;
243
244 NetEqDecodingTest();
245 virtual void SetUp();
246 virtual void TearDown();
247 void SelectDecoders(NetEqDecoder* used_codec);
248 void OpenInputFile(const std::string &rtp_file);
249 void Process();
250
251 void DecodeAndCompare(const std::string& rtp_file,
252 const std::string& output_checksum,
253 const std::string& network_stats_checksum,
254 const std::string& rtcp_stats_checksum,
255 bool gen_ref);
256
257 static void PopulateRtpInfo(int frame_index,
258 int timestamp,
259 RTPHeader* rtp_info);
260 static void PopulateCng(int frame_index,
261 int timestamp,
262 RTPHeader* rtp_info,
263 uint8_t* payload,
264 size_t* payload_len);
265
266 void WrapTest(uint16_t start_seq_no, uint32_t start_timestamp,
267 const std::set<uint16_t>& drop_seq_numbers,
268 bool expect_seq_no_wrap, bool expect_timestamp_wrap);
269
270 void LongCngWithClockDrift(double drift_factor,
271 double network_freeze_ms,
272 bool pull_audio_during_freeze,
273 int delay_tolerance_ms,
274 int max_time_to_speech_ms);
275
276 void DuplicateCng();
277
278 NetEq* neteq_;
279 NetEq::Config config_;
280 std::unique_ptr<test::RtpFileSource> rtp_source_;
281 std::unique_ptr<test::Packet> packet_;
282 unsigned int sim_clock_;
283 AudioFrame out_frame_;
284 int output_sample_rate_;
285 int algorithmic_delay_ms_;
286 };
287
288 // Allocating the static const so that it can be passed by reference.
289 const int NetEqDecodingTest::kTimeStepMs;
290 const size_t NetEqDecodingTest::kBlockSize8kHz;
291 const size_t NetEqDecodingTest::kBlockSize16kHz;
292 const size_t NetEqDecodingTest::kBlockSize32kHz;
293 const int NetEqDecodingTest::kInitSampleRateHz;
294
NetEqDecodingTest()295 NetEqDecodingTest::NetEqDecodingTest()
296 : neteq_(NULL),
297 config_(),
298 sim_clock_(0),
299 output_sample_rate_(kInitSampleRateHz),
300 algorithmic_delay_ms_(0) {
301 config_.sample_rate_hz = kInitSampleRateHz;
302 }
303
SetUp()304 void NetEqDecodingTest::SetUp() {
305 neteq_ = NetEq::Create(config_, CreateBuiltinAudioDecoderFactory());
306 NetEqNetworkStatistics stat;
307 ASSERT_EQ(0, neteq_->NetworkStatistics(&stat));
308 algorithmic_delay_ms_ = stat.current_buffer_size_ms;
309 ASSERT_TRUE(neteq_);
310 LoadDecoders(neteq_);
311 }
312
TearDown()313 void NetEqDecodingTest::TearDown() {
314 delete neteq_;
315 }
316
OpenInputFile(const std::string & rtp_file)317 void NetEqDecodingTest::OpenInputFile(const std::string &rtp_file) {
318 rtp_source_.reset(test::RtpFileSource::Create(rtp_file));
319 }
320
Process()321 void NetEqDecodingTest::Process() {
322 // Check if time to receive.
323 while (packet_ && sim_clock_ >= packet_->time_ms()) {
324 if (packet_->payload_length_bytes() > 0) {
325 #ifndef WEBRTC_CODEC_ISAC
326 // Ignore payload type 104 (iSAC-swb) if ISAC is not supported.
327 if (packet_->header().payloadType != 104)
328 #endif
329 ASSERT_EQ(0,
330 neteq_->InsertPacket(
331 packet_->header(),
332 rtc::ArrayView<const uint8_t>(
333 packet_->payload(), packet_->payload_length_bytes()),
334 static_cast<uint32_t>(packet_->time_ms() *
335 (output_sample_rate_ / 1000))));
336 }
337 // Get next packet.
338 packet_ = rtp_source_->NextPacket();
339 }
340
341 // Get audio from NetEq.
342 bool muted;
343 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
344 ASSERT_FALSE(muted);
345 ASSERT_TRUE((out_frame_.samples_per_channel_ == kBlockSize8kHz) ||
346 (out_frame_.samples_per_channel_ == kBlockSize16kHz) ||
347 (out_frame_.samples_per_channel_ == kBlockSize32kHz) ||
348 (out_frame_.samples_per_channel_ == kBlockSize48kHz));
349 output_sample_rate_ = out_frame_.sample_rate_hz_;
350 EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz());
351
352 // Increase time.
353 sim_clock_ += kTimeStepMs;
354 }
355
DecodeAndCompare(const std::string & rtp_file,const std::string & output_checksum,const std::string & network_stats_checksum,const std::string & rtcp_stats_checksum,bool gen_ref)356 void NetEqDecodingTest::DecodeAndCompare(
357 const std::string& rtp_file,
358 const std::string& output_checksum,
359 const std::string& network_stats_checksum,
360 const std::string& rtcp_stats_checksum,
361 bool gen_ref) {
362 OpenInputFile(rtp_file);
363
364 std::string ref_out_file =
365 gen_ref ? webrtc::test::OutputPath() + "neteq_universal_ref.pcm" : "";
366 ResultSink output(ref_out_file);
367
368 std::string stat_out_file =
369 gen_ref ? webrtc::test::OutputPath() + "neteq_network_stats.dat" : "";
370 ResultSink network_stats(stat_out_file);
371
372 std::string rtcp_out_file =
373 gen_ref ? webrtc::test::OutputPath() + "neteq_rtcp_stats.dat" : "";
374 ResultSink rtcp_stats(rtcp_out_file);
375
376 packet_ = rtp_source_->NextPacket();
377 int i = 0;
378 uint64_t last_concealed_samples = 0;
379 uint64_t last_total_samples_received = 0;
380 while (packet_) {
381 std::ostringstream ss;
382 ss << "Lap number " << i++ << " in DecodeAndCompare while loop";
383 SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
384 ASSERT_NO_FATAL_FAILURE(Process());
385 ASSERT_NO_FATAL_FAILURE(output.AddResult(
386 out_frame_.data(), out_frame_.samples_per_channel_));
387
388 // Query the network statistics API once per second
389 if (sim_clock_ % 1000 == 0) {
390 // Process NetworkStatistics.
391 NetEqNetworkStatistics current_network_stats;
392 ASSERT_EQ(0, neteq_->NetworkStatistics(¤t_network_stats));
393 ASSERT_NO_FATAL_FAILURE(network_stats.AddResult(current_network_stats));
394
395 // Compare with CurrentDelay, which should be identical.
396 EXPECT_EQ(current_network_stats.current_buffer_size_ms,
397 neteq_->CurrentDelayMs());
398
399 // Verify that liftime stats and network stats report similar loss
400 // concealment rates.
401 auto lifetime_stats = neteq_->GetLifetimeStatistics();
402 const uint64_t delta_concealed_samples =
403 lifetime_stats.concealed_samples - last_concealed_samples;
404 last_concealed_samples = lifetime_stats.concealed_samples;
405 const uint64_t delta_total_samples_received =
406 lifetime_stats.total_samples_received - last_total_samples_received;
407 last_total_samples_received = lifetime_stats.total_samples_received;
408 // The tolerance is 1% but expressed in Q14.
409 EXPECT_NEAR(
410 (delta_concealed_samples << 14) / delta_total_samples_received,
411 current_network_stats.expand_rate, (2 << 14) / 100.0);
412
413 // Process RTCPstat.
414 RtcpStatistics current_rtcp_stats;
415 neteq_->GetRtcpStatistics(¤t_rtcp_stats);
416 ASSERT_NO_FATAL_FAILURE(rtcp_stats.AddResult(current_rtcp_stats));
417 }
418 }
419
420 SCOPED_TRACE("Check output audio.");
421 output.VerifyChecksum(output_checksum);
422 SCOPED_TRACE("Check network stats.");
423 network_stats.VerifyChecksum(network_stats_checksum);
424 SCOPED_TRACE("Check rtcp stats.");
425 rtcp_stats.VerifyChecksum(rtcp_stats_checksum);
426 }
427
PopulateRtpInfo(int frame_index,int timestamp,RTPHeader * rtp_info)428 void NetEqDecodingTest::PopulateRtpInfo(int frame_index,
429 int timestamp,
430 RTPHeader* rtp_info) {
431 rtp_info->sequenceNumber = frame_index;
432 rtp_info->timestamp = timestamp;
433 rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC.
434 rtp_info->payloadType = 94; // PCM16b WB codec.
435 rtp_info->markerBit = 0;
436 }
437
PopulateCng(int frame_index,int timestamp,RTPHeader * rtp_info,uint8_t * payload,size_t * payload_len)438 void NetEqDecodingTest::PopulateCng(int frame_index,
439 int timestamp,
440 RTPHeader* rtp_info,
441 uint8_t* payload,
442 size_t* payload_len) {
443 rtp_info->sequenceNumber = frame_index;
444 rtp_info->timestamp = timestamp;
445 rtp_info->ssrc = 0x1234; // Just an arbitrary SSRC.
446 rtp_info->payloadType = 98; // WB CNG.
447 rtp_info->markerBit = 0;
448 payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen.
449 *payload_len = 1; // Only noise level, no spectral parameters.
450 }
451
452 #if !defined(WEBRTC_IOS) && defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && \
453 (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \
454 defined(WEBRTC_CODEC_ILBC) && !defined(WEBRTC_ARCH_ARM64)
455 #define MAYBE_TestBitExactness TestBitExactness
456 #else
457 #define MAYBE_TestBitExactness DISABLED_TestBitExactness
458 #endif
TEST_F(NetEqDecodingTest,MAYBE_TestBitExactness)459 TEST_F(NetEqDecodingTest, MAYBE_TestBitExactness) {
460 const std::string input_rtp_file =
461 webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp");
462
463 const std::string output_checksum = PlatformChecksum(
464 "09fa7646e2ad032a0b156177b95f09012430f81f",
465 "1c64eb8b55ce8878676c6a1e6ddd78f48de0668b",
466 "not used",
467 "09fa7646e2ad032a0b156177b95f09012430f81f",
468 "759fef89a5de52bd17e733dc255c671ce86be909");
469
470 const std::string network_stats_checksum =
471 PlatformChecksum("5b4262ca328e5f066af5d34f3380521583dd20de",
472 "80235b6d727281203acb63b98f9a9e85d95f7ec0",
473 "not used",
474 "5b4262ca328e5f066af5d34f3380521583dd20de",
475 "5b4262ca328e5f066af5d34f3380521583dd20de");
476
477 const std::string rtcp_stats_checksum = PlatformChecksum(
478 "b8880bf9fed2487efbddcb8d94b9937a29ae521d",
479 "f3f7b3d3e71d7e635240b5373b57df6a7e4ce9d4",
480 "not used",
481 "b8880bf9fed2487efbddcb8d94b9937a29ae521d",
482 "b8880bf9fed2487efbddcb8d94b9937a29ae521d");
483
484 DecodeAndCompare(input_rtp_file,
485 output_checksum,
486 network_stats_checksum,
487 rtcp_stats_checksum,
488 FLAG_gen_ref);
489 }
490
491 #if !defined(WEBRTC_IOS) && \
492 defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && \
493 defined(WEBRTC_CODEC_OPUS)
494 #define MAYBE_TestOpusBitExactness TestOpusBitExactness
495 #else
496 #define MAYBE_TestOpusBitExactness DISABLED_TestOpusBitExactness
497 #endif
TEST_F(NetEqDecodingTest,MAYBE_TestOpusBitExactness)498 TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) {
499 const std::string input_rtp_file =
500 webrtc::test::ResourcePath("audio_coding/neteq_opus", "rtp");
501
502 const std::string output_checksum = PlatformChecksum(
503 "7ea28d7edf9395f4ac8e8d8dd3a9e5c620b1bf48",
504 "5b1e691ab1c4465c742d6d944bc71e3b1c0e4c0e",
505 "b096114dd8c233eaf2b0ce9802ac95af13933772",
506 "7ea28d7edf9395f4ac8e8d8dd3a9e5c620b1bf48",
507 "7ea28d7edf9395f4ac8e8d8dd3a9e5c620b1bf48");
508
509 const std::string network_stats_checksum =
510 PlatformChecksum("9e72233c78baf685e500dd6c94212b30a4c5f27d",
511 "9a37270e4242fbd31e80bb47dc5e7ab82cf2d557",
512 "4f1e9734bc80a290faaf9d611efcb8d7802dbc4f",
513 "9e72233c78baf685e500dd6c94212b30a4c5f27d",
514 "9e72233c78baf685e500dd6c94212b30a4c5f27d");
515
516 const std::string rtcp_stats_checksum = PlatformChecksum(
517 "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
518 "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
519 "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
520 "e37c797e3de6a64dda88c9ade7a013d022a2e1e0",
521 "e37c797e3de6a64dda88c9ade7a013d022a2e1e0");
522
523 DecodeAndCompare(input_rtp_file,
524 output_checksum,
525 network_stats_checksum,
526 rtcp_stats_checksum,
527 FLAG_gen_ref);
528 }
529
530 // This test fixture is identical to NetEqDecodingTest, except that it enables
531 // the WebRTC-NetEqOpusDtxDelayFix field trial.
532 // TODO(bugs.webrtc.org/8488): When the field trial is over and the feature is
533 // default enabled, remove this fixture class and let the
534 // TestOpusDtxBitExactness test build directly on NetEqDecodingTest.
535 class NetEqDecodingTestWithOpusDtxFieldTrial : public NetEqDecodingTest {
536 public:
NetEqDecodingTestWithOpusDtxFieldTrial()537 NetEqDecodingTestWithOpusDtxFieldTrial()
538 : override_field_trials_("WebRTC-NetEqOpusDtxDelayFix/Enabled/") {}
539
540 private:
541 test::ScopedFieldTrials override_field_trials_;
542 };
543
544 #if !defined(WEBRTC_IOS) && \
545 defined(WEBRTC_NETEQ_UNITTEST_BITEXACT) && \
546 defined(WEBRTC_CODEC_OPUS)
547 #define MAYBE_TestOpusDtxBitExactness TestOpusDtxBitExactness
548 #else
549 #define MAYBE_TestOpusDtxBitExactness DISABLED_TestOpusDtxBitExactness
550 #endif
TEST_F(NetEqDecodingTestWithOpusDtxFieldTrial,MAYBE_TestOpusDtxBitExactness)551 TEST_F(NetEqDecodingTestWithOpusDtxFieldTrial, MAYBE_TestOpusDtxBitExactness) {
552 const std::string input_rtp_file =
553 webrtc::test::ResourcePath("audio_coding/neteq_opus_dtx", "rtp");
554
555 const std::string output_checksum =
556 PlatformChecksum("713af6c92881f5aab1285765ee6680da9d1c06ce",
557 "3ec991b96872123f1554c03c543ca5d518431e46",
558 "da9f9a2d94e0c2d67342fad4965d7b91cda50b25",
559 "713af6c92881f5aab1285765ee6680da9d1c06ce",
560 "713af6c92881f5aab1285765ee6680da9d1c06ce");
561
562 const std::string network_stats_checksum =
563 "bab58dc587d956f326056d7340c96eb9d2d3cc21";
564
565 const std::string rtcp_stats_checksum =
566 "ac27a7f305efb58b39bf123dccee25dee5758e63";
567
568 DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum,
569 rtcp_stats_checksum, FLAG_gen_ref);
570 }
571
572 // Use fax mode to avoid time-scaling. This is to simplify the testing of
573 // packet waiting times in the packet buffer.
574 class NetEqDecodingTestFaxMode : public NetEqDecodingTest {
575 protected:
NetEqDecodingTestFaxMode()576 NetEqDecodingTestFaxMode() : NetEqDecodingTest() {
577 config_.playout_mode = kPlayoutFax;
578 }
579 void TestJitterBufferDelay(bool apply_packet_loss);
580 };
581
TEST_F(NetEqDecodingTestFaxMode,TestFrameWaitingTimeStatistics)582 TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) {
583 // Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio.
584 size_t num_frames = 30;
585 const size_t kSamples = 10 * 16;
586 const size_t kPayloadBytes = kSamples * 2;
587 for (size_t i = 0; i < num_frames; ++i) {
588 const uint8_t payload[kPayloadBytes] = {0};
589 RTPHeader rtp_info;
590 rtp_info.sequenceNumber = rtc::checked_cast<uint16_t>(i);
591 rtp_info.timestamp = rtc::checked_cast<uint32_t>(i * kSamples);
592 rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC.
593 rtp_info.payloadType = 94; // PCM16b WB codec.
594 rtp_info.markerBit = 0;
595 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
596 }
597 // Pull out all data.
598 for (size_t i = 0; i < num_frames; ++i) {
599 bool muted;
600 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
601 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
602 }
603
604 NetEqNetworkStatistics stats;
605 EXPECT_EQ(0, neteq_->NetworkStatistics(&stats));
606 // Since all frames are dumped into NetEQ at once, but pulled out with 10 ms
607 // spacing (per definition), we expect the delay to increase with 10 ms for
608 // each packet. Thus, we are calculating the statistics for a series from 10
609 // to 300, in steps of 10 ms.
610 EXPECT_EQ(155, stats.mean_waiting_time_ms);
611 EXPECT_EQ(155, stats.median_waiting_time_ms);
612 EXPECT_EQ(10, stats.min_waiting_time_ms);
613 EXPECT_EQ(300, stats.max_waiting_time_ms);
614
615 // Check statistics again and make sure it's been reset.
616 EXPECT_EQ(0, neteq_->NetworkStatistics(&stats));
617 EXPECT_EQ(-1, stats.mean_waiting_time_ms);
618 EXPECT_EQ(-1, stats.median_waiting_time_ms);
619 EXPECT_EQ(-1, stats.min_waiting_time_ms);
620 EXPECT_EQ(-1, stats.max_waiting_time_ms);
621 }
622
TEST_F(NetEqDecodingTest,TestAverageInterArrivalTimeNegative)623 TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) {
624 const int kNumFrames = 3000; // Needed for convergence.
625 int frame_index = 0;
626 const size_t kSamples = 10 * 16;
627 const size_t kPayloadBytes = kSamples * 2;
628 while (frame_index < kNumFrames) {
629 // Insert one packet each time, except every 10th time where we insert two
630 // packets at once. This will create a negative clock-drift of approx. 10%.
631 int num_packets = (frame_index % 10 == 0 ? 2 : 1);
632 for (int n = 0; n < num_packets; ++n) {
633 uint8_t payload[kPayloadBytes] = {0};
634 RTPHeader rtp_info;
635 PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
636 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
637 ++frame_index;
638 }
639
640 // Pull out data once.
641 bool muted;
642 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
643 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
644 }
645
646 NetEqNetworkStatistics network_stats;
647 ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
648 EXPECT_EQ(-103192, network_stats.clockdrift_ppm);
649 }
650
TEST_F(NetEqDecodingTest,TestAverageInterArrivalTimePositive)651 TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
652 const int kNumFrames = 5000; // Needed for convergence.
653 int frame_index = 0;
654 const size_t kSamples = 10 * 16;
655 const size_t kPayloadBytes = kSamples * 2;
656 for (int i = 0; i < kNumFrames; ++i) {
657 // Insert one packet each time, except every 10th time where we don't insert
658 // any packet. This will create a positive clock-drift of approx. 11%.
659 int num_packets = (i % 10 == 9 ? 0 : 1);
660 for (int n = 0; n < num_packets; ++n) {
661 uint8_t payload[kPayloadBytes] = {0};
662 RTPHeader rtp_info;
663 PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
664 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
665 ++frame_index;
666 }
667
668 // Pull out data once.
669 bool muted;
670 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
671 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
672 }
673
674 NetEqNetworkStatistics network_stats;
675 ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
676 EXPECT_EQ(110953, network_stats.clockdrift_ppm);
677 }
678
LongCngWithClockDrift(double drift_factor,double network_freeze_ms,bool pull_audio_during_freeze,int delay_tolerance_ms,int max_time_to_speech_ms)679 void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
680 double network_freeze_ms,
681 bool pull_audio_during_freeze,
682 int delay_tolerance_ms,
683 int max_time_to_speech_ms) {
684 uint16_t seq_no = 0;
685 uint32_t timestamp = 0;
686 const int kFrameSizeMs = 30;
687 const size_t kSamples = kFrameSizeMs * 16;
688 const size_t kPayloadBytes = kSamples * 2;
689 double next_input_time_ms = 0.0;
690 double t_ms;
691 bool muted;
692
693 // Insert speech for 5 seconds.
694 const int kSpeechDurationMs = 5000;
695 for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
696 // Each turn in this for loop is 10 ms.
697 while (next_input_time_ms <= t_ms) {
698 // Insert one 30 ms speech frame.
699 uint8_t payload[kPayloadBytes] = {0};
700 RTPHeader rtp_info;
701 PopulateRtpInfo(seq_no, timestamp, &rtp_info);
702 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
703 ++seq_no;
704 timestamp += kSamples;
705 next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor;
706 }
707 // Pull out data once.
708 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
709 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
710 }
711
712 EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
713 rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
714 ASSERT_TRUE(playout_timestamp);
715 int32_t delay_before = timestamp - *playout_timestamp;
716
717 // Insert CNG for 1 minute (= 60000 ms).
718 const int kCngPeriodMs = 100;
719 const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples.
720 const int kCngDurationMs = 60000;
721 for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) {
722 // Each turn in this for loop is 10 ms.
723 while (next_input_time_ms <= t_ms) {
724 // Insert one CNG frame each 100 ms.
725 uint8_t payload[kPayloadBytes];
726 size_t payload_len;
727 RTPHeader rtp_info;
728 PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
729 ASSERT_EQ(0, neteq_->InsertPacket(
730 rtp_info,
731 rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
732 ++seq_no;
733 timestamp += kCngPeriodSamples;
734 next_input_time_ms += static_cast<double>(kCngPeriodMs) * drift_factor;
735 }
736 // Pull out data once.
737 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
738 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
739 }
740
741 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
742
743 if (network_freeze_ms > 0) {
744 // First keep pulling audio for |network_freeze_ms| without inserting
745 // any data, then insert CNG data corresponding to |network_freeze_ms|
746 // without pulling any output audio.
747 const double loop_end_time = t_ms + network_freeze_ms;
748 for (; t_ms < loop_end_time; t_ms += 10) {
749 // Pull out data once.
750 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
751 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
752 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
753 }
754 bool pull_once = pull_audio_during_freeze;
755 // If |pull_once| is true, GetAudio will be called once half-way through
756 // the network recovery period.
757 double pull_time_ms = (t_ms + next_input_time_ms) / 2;
758 while (next_input_time_ms <= t_ms) {
759 if (pull_once && next_input_time_ms >= pull_time_ms) {
760 pull_once = false;
761 // Pull out data once.
762 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
763 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
764 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
765 t_ms += 10;
766 }
767 // Insert one CNG frame each 100 ms.
768 uint8_t payload[kPayloadBytes];
769 size_t payload_len;
770 RTPHeader rtp_info;
771 PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
772 ASSERT_EQ(0, neteq_->InsertPacket(
773 rtp_info,
774 rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
775 ++seq_no;
776 timestamp += kCngPeriodSamples;
777 next_input_time_ms += kCngPeriodMs * drift_factor;
778 }
779 }
780
781 // Insert speech again until output type is speech.
782 double speech_restart_time_ms = t_ms;
783 while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) {
784 // Each turn in this for loop is 10 ms.
785 while (next_input_time_ms <= t_ms) {
786 // Insert one 30 ms speech frame.
787 uint8_t payload[kPayloadBytes] = {0};
788 RTPHeader rtp_info;
789 PopulateRtpInfo(seq_no, timestamp, &rtp_info);
790 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
791 ++seq_no;
792 timestamp += kSamples;
793 next_input_time_ms += kFrameSizeMs * drift_factor;
794 }
795 // Pull out data once.
796 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
797 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
798 // Increase clock.
799 t_ms += 10;
800 }
801
802 // Check that the speech starts again within reasonable time.
803 double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
804 EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
805 playout_timestamp = neteq_->GetPlayoutTimestamp();
806 ASSERT_TRUE(playout_timestamp);
807 int32_t delay_after = timestamp - *playout_timestamp;
808 // Compare delay before and after, and make sure it differs less than 20 ms.
809 EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16);
810 EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16);
811 }
812
TEST_F(NetEqDecodingTest,LongCngWithNegativeClockDrift)813 TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDrift) {
814 // Apply a clock drift of -25 ms / s (sender faster than receiver).
815 const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
816 const double kNetworkFreezeTimeMs = 0.0;
817 const bool kGetAudioDuringFreezeRecovery = false;
818 const int kDelayToleranceMs = 20;
819 const int kMaxTimeToSpeechMs = 100;
820 LongCngWithClockDrift(kDriftFactor,
821 kNetworkFreezeTimeMs,
822 kGetAudioDuringFreezeRecovery,
823 kDelayToleranceMs,
824 kMaxTimeToSpeechMs);
825 }
826
TEST_F(NetEqDecodingTest,LongCngWithPositiveClockDrift)827 TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDrift) {
828 // Apply a clock drift of +25 ms / s (sender slower than receiver).
829 const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
830 const double kNetworkFreezeTimeMs = 0.0;
831 const bool kGetAudioDuringFreezeRecovery = false;
832 const int kDelayToleranceMs = 20;
833 const int kMaxTimeToSpeechMs = 100;
834 LongCngWithClockDrift(kDriftFactor,
835 kNetworkFreezeTimeMs,
836 kGetAudioDuringFreezeRecovery,
837 kDelayToleranceMs,
838 kMaxTimeToSpeechMs);
839 }
840
TEST_F(NetEqDecodingTest,LongCngWithNegativeClockDriftNetworkFreeze)841 TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDriftNetworkFreeze) {
842 // Apply a clock drift of -25 ms / s (sender faster than receiver).
843 const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
844 const double kNetworkFreezeTimeMs = 5000.0;
845 const bool kGetAudioDuringFreezeRecovery = false;
846 const int kDelayToleranceMs = 50;
847 const int kMaxTimeToSpeechMs = 200;
848 LongCngWithClockDrift(kDriftFactor,
849 kNetworkFreezeTimeMs,
850 kGetAudioDuringFreezeRecovery,
851 kDelayToleranceMs,
852 kMaxTimeToSpeechMs);
853 }
854
TEST_F(NetEqDecodingTest,LongCngWithPositiveClockDriftNetworkFreeze)855 TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreeze) {
856 // Apply a clock drift of +25 ms / s (sender slower than receiver).
857 const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
858 const double kNetworkFreezeTimeMs = 5000.0;
859 const bool kGetAudioDuringFreezeRecovery = false;
860 const int kDelayToleranceMs = 20;
861 const int kMaxTimeToSpeechMs = 100;
862 LongCngWithClockDrift(kDriftFactor,
863 kNetworkFreezeTimeMs,
864 kGetAudioDuringFreezeRecovery,
865 kDelayToleranceMs,
866 kMaxTimeToSpeechMs);
867 }
868
TEST_F(NetEqDecodingTest,LongCngWithPositiveClockDriftNetworkFreezeExtraPull)869 TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreezeExtraPull) {
870 // Apply a clock drift of +25 ms / s (sender slower than receiver).
871 const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
872 const double kNetworkFreezeTimeMs = 5000.0;
873 const bool kGetAudioDuringFreezeRecovery = true;
874 const int kDelayToleranceMs = 20;
875 const int kMaxTimeToSpeechMs = 100;
876 LongCngWithClockDrift(kDriftFactor,
877 kNetworkFreezeTimeMs,
878 kGetAudioDuringFreezeRecovery,
879 kDelayToleranceMs,
880 kMaxTimeToSpeechMs);
881 }
882
TEST_F(NetEqDecodingTest,LongCngWithoutClockDrift)883 TEST_F(NetEqDecodingTest, LongCngWithoutClockDrift) {
884 const double kDriftFactor = 1.0; // No drift.
885 const double kNetworkFreezeTimeMs = 0.0;
886 const bool kGetAudioDuringFreezeRecovery = false;
887 const int kDelayToleranceMs = 10;
888 const int kMaxTimeToSpeechMs = 50;
889 LongCngWithClockDrift(kDriftFactor,
890 kNetworkFreezeTimeMs,
891 kGetAudioDuringFreezeRecovery,
892 kDelayToleranceMs,
893 kMaxTimeToSpeechMs);
894 }
895
TEST_F(NetEqDecodingTest,UnknownPayloadType)896 TEST_F(NetEqDecodingTest, UnknownPayloadType) {
897 const size_t kPayloadBytes = 100;
898 uint8_t payload[kPayloadBytes] = {0};
899 RTPHeader rtp_info;
900 PopulateRtpInfo(0, 0, &rtp_info);
901 rtp_info.payloadType = 1; // Not registered as a decoder.
902 EXPECT_EQ(NetEq::kFail, neteq_->InsertPacket(rtp_info, payload, 0));
903 }
904
905 #if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
906 #define MAYBE_DecoderError DecoderError
907 #else
908 #define MAYBE_DecoderError DISABLED_DecoderError
909 #endif
910
TEST_F(NetEqDecodingTest,MAYBE_DecoderError)911 TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
912 const size_t kPayloadBytes = 100;
913 uint8_t payload[kPayloadBytes] = {0};
914 RTPHeader rtp_info;
915 PopulateRtpInfo(0, 0, &rtp_info);
916 rtp_info.payloadType = 103; // iSAC, but the payload is invalid.
917 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
918 // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
919 // to GetAudio.
920 int16_t* out_frame_data = out_frame_.mutable_data();
921 for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
922 out_frame_data[i] = 1;
923 }
924 bool muted;
925 EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted));
926 ASSERT_FALSE(muted);
927
928 // Verify that the first 160 samples are set to 0.
929 static const int kExpectedOutputLength = 160; // 10 ms at 16 kHz sample rate.
930 const int16_t* const_out_frame_data = out_frame_.data();
931 for (int i = 0; i < kExpectedOutputLength; ++i) {
932 std::ostringstream ss;
933 ss << "i = " << i;
934 SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
935 EXPECT_EQ(0, const_out_frame_data[i]);
936 }
937 }
938
TEST_F(NetEqDecodingTest,GetAudioBeforeInsertPacket)939 TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
940 // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
941 // to GetAudio.
942 int16_t* out_frame_data = out_frame_.mutable_data();
943 for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
944 out_frame_data[i] = 1;
945 }
946 bool muted;
947 EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
948 ASSERT_FALSE(muted);
949 // Verify that the first block of samples is set to 0.
950 static const int kExpectedOutputLength =
951 kInitSampleRateHz / 100; // 10 ms at initial sample rate.
952 const int16_t* const_out_frame_data = out_frame_.data();
953 for (int i = 0; i < kExpectedOutputLength; ++i) {
954 std::ostringstream ss;
955 ss << "i = " << i;
956 SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
957 EXPECT_EQ(0, const_out_frame_data[i]);
958 }
959 // Verify that the sample rate did not change from the initial configuration.
960 EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz());
961 }
962
963 class NetEqBgnTest : public NetEqDecodingTest {
964 protected:
965 virtual void TestCondition(double sum_squared_noise,
966 bool should_be_faded) = 0;
967
CheckBgn(int sampling_rate_hz)968 void CheckBgn(int sampling_rate_hz) {
969 size_t expected_samples_per_channel = 0;
970 uint8_t payload_type = 0xFF; // Invalid.
971 if (sampling_rate_hz == 8000) {
972 expected_samples_per_channel = kBlockSize8kHz;
973 payload_type = 93; // PCM 16, 8 kHz.
974 } else if (sampling_rate_hz == 16000) {
975 expected_samples_per_channel = kBlockSize16kHz;
976 payload_type = 94; // PCM 16, 16 kHZ.
977 } else if (sampling_rate_hz == 32000) {
978 expected_samples_per_channel = kBlockSize32kHz;
979 payload_type = 95; // PCM 16, 32 kHz.
980 } else {
981 ASSERT_TRUE(false); // Unsupported test case.
982 }
983
984 AudioFrame output;
985 test::AudioLoop input;
986 // We are using the same 32 kHz input file for all tests, regardless of
987 // |sampling_rate_hz|. The output may sound weird, but the test is still
988 // valid.
989 ASSERT_TRUE(input.Init(
990 webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
991 10 * sampling_rate_hz, // Max 10 seconds loop length.
992 expected_samples_per_channel));
993
994 // Payload of 10 ms of PCM16 32 kHz.
995 uint8_t payload[kBlockSize32kHz * sizeof(int16_t)];
996 RTPHeader rtp_info;
997 PopulateRtpInfo(0, 0, &rtp_info);
998 rtp_info.payloadType = payload_type;
999
1000 uint32_t receive_timestamp = 0;
1001 bool muted;
1002 for (int n = 0; n < 10; ++n) { // Insert few packets and get audio.
1003 auto block = input.GetNextBlock();
1004 ASSERT_EQ(expected_samples_per_channel, block.size());
1005 size_t enc_len_bytes =
1006 WebRtcPcm16b_Encode(block.data(), block.size(), payload);
1007 ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2);
1008
1009 ASSERT_EQ(0, neteq_->InsertPacket(
1010 rtp_info,
1011 rtc::ArrayView<const uint8_t>(payload, enc_len_bytes),
1012 receive_timestamp));
1013 output.Reset();
1014 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1015 ASSERT_EQ(1u, output.num_channels_);
1016 ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_);
1017 ASSERT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);
1018
1019 // Next packet.
1020 rtp_info.timestamp += rtc::checked_cast<uint32_t>(
1021 expected_samples_per_channel);
1022 rtp_info.sequenceNumber++;
1023 receive_timestamp += rtc::checked_cast<uint32_t>(
1024 expected_samples_per_channel);
1025 }
1026
1027 output.Reset();
1028
1029 // Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull
1030 // one frame without checking speech-type. This is the first frame pulled
1031 // without inserting any packet, and might not be labeled as PLC.
1032 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1033 ASSERT_EQ(1u, output.num_channels_);
1034 ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_);
1035
1036 // To be able to test the fading of background noise we need at lease to
1037 // pull 611 frames.
1038 const int kFadingThreshold = 611;
1039
1040 // Test several CNG-to-PLC packet for the expected behavior. The number 20
1041 // is arbitrary, but sufficiently large to test enough number of frames.
1042 const int kNumPlcToCngTestFrames = 20;
1043 bool plc_to_cng = false;
1044 for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
1045 output.Reset();
1046 // Set to non-zero.
1047 memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes);
1048 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1049 ASSERT_FALSE(muted);
1050 ASSERT_EQ(1u, output.num_channels_);
1051 ASSERT_EQ(expected_samples_per_channel, output.samples_per_channel_);
1052 if (output.speech_type_ == AudioFrame::kPLCCNG) {
1053 plc_to_cng = true;
1054 double sum_squared = 0;
1055 const int16_t* output_data = output.data();
1056 for (size_t k = 0;
1057 k < output.num_channels_ * output.samples_per_channel_; ++k)
1058 sum_squared += output_data[k] * output_data[k];
1059 TestCondition(sum_squared, n > kFadingThreshold);
1060 } else {
1061 EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
1062 }
1063 }
1064 EXPECT_TRUE(plc_to_cng); // Just to be sure that PLC-to-CNG has occurred.
1065 }
1066 };
1067
1068 class NetEqBgnTestOn : public NetEqBgnTest {
1069 protected:
NetEqBgnTestOn()1070 NetEqBgnTestOn() : NetEqBgnTest() {
1071 config_.background_noise_mode = NetEq::kBgnOn;
1072 }
1073
TestCondition(double sum_squared_noise,bool)1074 void TestCondition(double sum_squared_noise, bool /*should_be_faded*/) {
1075 EXPECT_NE(0, sum_squared_noise);
1076 }
1077 };
1078
1079 class NetEqBgnTestOff : public NetEqBgnTest {
1080 protected:
NetEqBgnTestOff()1081 NetEqBgnTestOff() : NetEqBgnTest() {
1082 config_.background_noise_mode = NetEq::kBgnOff;
1083 }
1084
TestCondition(double sum_squared_noise,bool)1085 void TestCondition(double sum_squared_noise, bool /*should_be_faded*/) {
1086 EXPECT_EQ(0, sum_squared_noise);
1087 }
1088 };
1089
1090 class NetEqBgnTestFade : public NetEqBgnTest {
1091 protected:
NetEqBgnTestFade()1092 NetEqBgnTestFade() : NetEqBgnTest() {
1093 config_.background_noise_mode = NetEq::kBgnFade;
1094 }
1095
TestCondition(double sum_squared_noise,bool should_be_faded)1096 void TestCondition(double sum_squared_noise, bool should_be_faded) {
1097 if (should_be_faded)
1098 EXPECT_EQ(0, sum_squared_noise);
1099 }
1100 };
1101
TEST_F(NetEqBgnTestOn,RunTest)1102 TEST_F(NetEqBgnTestOn, RunTest) {
1103 CheckBgn(8000);
1104 CheckBgn(16000);
1105 CheckBgn(32000);
1106 }
1107
TEST_F(NetEqBgnTestOff,RunTest)1108 TEST_F(NetEqBgnTestOff, RunTest) {
1109 CheckBgn(8000);
1110 CheckBgn(16000);
1111 CheckBgn(32000);
1112 }
1113
TEST_F(NetEqBgnTestFade,RunTest)1114 TEST_F(NetEqBgnTestFade, RunTest) {
1115 CheckBgn(8000);
1116 CheckBgn(16000);
1117 CheckBgn(32000);
1118 }
1119
WrapTest(uint16_t start_seq_no,uint32_t start_timestamp,const std::set<uint16_t> & drop_seq_numbers,bool expect_seq_no_wrap,bool expect_timestamp_wrap)1120 void NetEqDecodingTest::WrapTest(uint16_t start_seq_no,
1121 uint32_t start_timestamp,
1122 const std::set<uint16_t>& drop_seq_numbers,
1123 bool expect_seq_no_wrap,
1124 bool expect_timestamp_wrap) {
1125 uint16_t seq_no = start_seq_no;
1126 uint32_t timestamp = start_timestamp;
1127 const int kBlocksPerFrame = 3; // Number of 10 ms blocks per frame.
1128 const int kFrameSizeMs = kBlocksPerFrame * kTimeStepMs;
1129 const int kSamples = kBlockSize16kHz * kBlocksPerFrame;
1130 const size_t kPayloadBytes = kSamples * sizeof(int16_t);
1131 double next_input_time_ms = 0.0;
1132 uint32_t receive_timestamp = 0;
1133
1134 // Insert speech for 2 seconds.
1135 const int kSpeechDurationMs = 2000;
1136 int packets_inserted = 0;
1137 uint16_t last_seq_no;
1138 uint32_t last_timestamp;
1139 bool timestamp_wrapped = false;
1140 bool seq_no_wrapped = false;
1141 for (double t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
1142 // Each turn in this for loop is 10 ms.
1143 while (next_input_time_ms <= t_ms) {
1144 // Insert one 30 ms speech frame.
1145 uint8_t payload[kPayloadBytes] = {0};
1146 RTPHeader rtp_info;
1147 PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1148 if (drop_seq_numbers.find(seq_no) == drop_seq_numbers.end()) {
1149 // This sequence number was not in the set to drop. Insert it.
1150 ASSERT_EQ(0,
1151 neteq_->InsertPacket(rtp_info, payload, receive_timestamp));
1152 ++packets_inserted;
1153 }
1154 NetEqNetworkStatistics network_stats;
1155 ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
1156
1157 // Due to internal NetEq logic, preferred buffer-size is about 4 times the
1158 // packet size for first few packets. Therefore we refrain from checking
1159 // the criteria.
1160 if (packets_inserted > 4) {
1161 // Expect preferred and actual buffer size to be no more than 2 frames.
1162 EXPECT_LE(network_stats.preferred_buffer_size_ms, kFrameSizeMs * 2);
1163 EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2 +
1164 algorithmic_delay_ms_);
1165 }
1166 last_seq_no = seq_no;
1167 last_timestamp = timestamp;
1168
1169 ++seq_no;
1170 timestamp += kSamples;
1171 receive_timestamp += kSamples;
1172 next_input_time_ms += static_cast<double>(kFrameSizeMs);
1173
1174 seq_no_wrapped |= seq_no < last_seq_no;
1175 timestamp_wrapped |= timestamp < last_timestamp;
1176 }
1177 // Pull out data once.
1178 AudioFrame output;
1179 bool muted;
1180 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1181 ASSERT_EQ(kBlockSize16kHz, output.samples_per_channel_);
1182 ASSERT_EQ(1u, output.num_channels_);
1183
1184 // Expect delay (in samples) to be less than 2 packets.
1185 rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
1186 ASSERT_TRUE(playout_timestamp);
1187 EXPECT_LE(timestamp - *playout_timestamp,
1188 static_cast<uint32_t>(kSamples * 2));
1189 }
1190 // Make sure we have actually tested wrap-around.
1191 ASSERT_EQ(expect_seq_no_wrap, seq_no_wrapped);
1192 ASSERT_EQ(expect_timestamp_wrap, timestamp_wrapped);
1193 }
1194
TEST_F(NetEqDecodingTest,SequenceNumberWrap)1195 TEST_F(NetEqDecodingTest, SequenceNumberWrap) {
1196 // Start with a sequence number that will soon wrap.
1197 std::set<uint16_t> drop_seq_numbers; // Don't drop any packets.
1198 WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false);
1199 }
1200
TEST_F(NetEqDecodingTest,SequenceNumberWrapAndDrop)1201 TEST_F(NetEqDecodingTest, SequenceNumberWrapAndDrop) {
1202 // Start with a sequence number that will soon wrap.
1203 std::set<uint16_t> drop_seq_numbers;
1204 drop_seq_numbers.insert(0xFFFF);
1205 drop_seq_numbers.insert(0x0);
1206 WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false);
1207 }
1208
TEST_F(NetEqDecodingTest,TimestampWrap)1209 TEST_F(NetEqDecodingTest, TimestampWrap) {
1210 // Start with a timestamp that will soon wrap.
1211 std::set<uint16_t> drop_seq_numbers;
1212 WrapTest(0, 0xFFFFFFFF - 3000, drop_seq_numbers, false, true);
1213 }
1214
TEST_F(NetEqDecodingTest,TimestampAndSequenceNumberWrap)1215 TEST_F(NetEqDecodingTest, TimestampAndSequenceNumberWrap) {
1216 // Start with a timestamp and a sequence number that will wrap at the same
1217 // time.
1218 std::set<uint16_t> drop_seq_numbers;
1219 WrapTest(0xFFFF - 10, 0xFFFFFFFF - 5000, drop_seq_numbers, true, true);
1220 }
1221
DuplicateCng()1222 void NetEqDecodingTest::DuplicateCng() {
1223 uint16_t seq_no = 0;
1224 uint32_t timestamp = 0;
1225 const int kFrameSizeMs = 10;
1226 const int kSampleRateKhz = 16;
1227 const int kSamples = kFrameSizeMs * kSampleRateKhz;
1228 const size_t kPayloadBytes = kSamples * 2;
1229
1230 const int algorithmic_delay_samples = std::max(
1231 algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8);
1232 // Insert three speech packets. Three are needed to get the frame length
1233 // correct.
1234 uint8_t payload[kPayloadBytes] = {0};
1235 RTPHeader rtp_info;
1236 bool muted;
1237 for (int i = 0; i < 3; ++i) {
1238 PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1239 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1240 ++seq_no;
1241 timestamp += kSamples;
1242
1243 // Pull audio once.
1244 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1245 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1246 }
1247 // Verify speech output.
1248 EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1249
1250 // Insert same CNG packet twice.
1251 const int kCngPeriodMs = 100;
1252 const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz;
1253 size_t payload_len;
1254 PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
1255 // This is the first time this CNG packet is inserted.
1256 ASSERT_EQ(
1257 0, neteq_->InsertPacket(
1258 rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1259
1260 // Pull audio once and make sure CNG is played.
1261 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1262 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1263 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1264 EXPECT_FALSE(
1265 neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG.
1266 EXPECT_EQ(timestamp - algorithmic_delay_samples,
1267 out_frame_.timestamp_ + out_frame_.samples_per_channel_);
1268
1269 // Insert the same CNG packet again. Note that at this point it is old, since
1270 // we have already decoded the first copy of it.
1271 ASSERT_EQ(
1272 0, neteq_->InsertPacket(
1273 rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1274
1275 // Pull audio until we have played |kCngPeriodMs| of CNG. Start at 10 ms since
1276 // we have already pulled out CNG once.
1277 for (int cng_time_ms = 10; cng_time_ms < kCngPeriodMs; cng_time_ms += 10) {
1278 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1279 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1280 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1281 EXPECT_FALSE(
1282 neteq_->GetPlayoutTimestamp()); // Returns empty value during CNG.
1283 EXPECT_EQ(timestamp - algorithmic_delay_samples,
1284 out_frame_.timestamp_ + out_frame_.samples_per_channel_);
1285 }
1286
1287 // Insert speech again.
1288 ++seq_no;
1289 timestamp += kCngPeriodSamples;
1290 PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1291 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1292
1293 // Pull audio once and verify that the output is speech again.
1294 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1295 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1296 EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1297 rtc::Optional<uint32_t> playout_timestamp = neteq_->GetPlayoutTimestamp();
1298 ASSERT_TRUE(playout_timestamp);
1299 EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
1300 *playout_timestamp);
1301 }
1302
TEST_F(NetEqDecodingTest,DiscardDuplicateCng)1303 TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
1304
TEST_F(NetEqDecodingTest,CngFirst)1305 TEST_F(NetEqDecodingTest, CngFirst) {
1306 uint16_t seq_no = 0;
1307 uint32_t timestamp = 0;
1308 const int kFrameSizeMs = 10;
1309 const int kSampleRateKhz = 16;
1310 const int kSamples = kFrameSizeMs * kSampleRateKhz;
1311 const int kPayloadBytes = kSamples * 2;
1312 const int kCngPeriodMs = 100;
1313 const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz;
1314 size_t payload_len;
1315
1316 uint8_t payload[kPayloadBytes] = {0};
1317 RTPHeader rtp_info;
1318
1319 PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
1320 ASSERT_EQ(
1321 NetEq::kOK,
1322 neteq_->InsertPacket(
1323 rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1324 ++seq_no;
1325 timestamp += kCngPeriodSamples;
1326
1327 // Pull audio once and make sure CNG is played.
1328 bool muted;
1329 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1330 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1331 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1332
1333 // Insert some speech packets.
1334 const uint32_t first_speech_timestamp = timestamp;
1335 int timeout_counter = 0;
1336 do {
1337 ASSERT_LT(timeout_counter++, 20) << "Test timed out";
1338 PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1339 ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1340 ++seq_no;
1341 timestamp += kSamples;
1342
1343 // Pull audio once.
1344 ASSERT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1345 ASSERT_EQ(kBlockSize16kHz, out_frame_.samples_per_channel_);
1346 } while (!IsNewerTimestamp(out_frame_.timestamp_, first_speech_timestamp));
1347 // Verify speech output.
1348 EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1349 }
1350
1351 class NetEqDecodingTestWithMutedState : public NetEqDecodingTest {
1352 public:
NetEqDecodingTestWithMutedState()1353 NetEqDecodingTestWithMutedState() : NetEqDecodingTest() {
1354 config_.enable_muted_state = true;
1355 }
1356
1357 protected:
1358 static constexpr size_t kSamples = 10 * 16;
1359 static constexpr size_t kPayloadBytes = kSamples * 2;
1360
InsertPacket(uint32_t rtp_timestamp)1361 void InsertPacket(uint32_t rtp_timestamp) {
1362 uint8_t payload[kPayloadBytes] = {0};
1363 RTPHeader rtp_info;
1364 PopulateRtpInfo(0, rtp_timestamp, &rtp_info);
1365 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1366 }
1367
InsertCngPacket(uint32_t rtp_timestamp)1368 void InsertCngPacket(uint32_t rtp_timestamp) {
1369 uint8_t payload[kPayloadBytes] = {0};
1370 RTPHeader rtp_info;
1371 size_t payload_len;
1372 PopulateCng(0, rtp_timestamp, &rtp_info, payload, &payload_len);
1373 EXPECT_EQ(
1374 NetEq::kOK,
1375 neteq_->InsertPacket(
1376 rtp_info, rtc::ArrayView<const uint8_t>(payload, payload_len), 0));
1377 }
1378
GetAudioReturnMuted()1379 bool GetAudioReturnMuted() {
1380 bool muted;
1381 EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1382 return muted;
1383 }
1384
GetAudioUntilMuted()1385 void GetAudioUntilMuted() {
1386 while (!GetAudioReturnMuted()) {
1387 ASSERT_LT(counter_++, 1000) << "Test timed out";
1388 }
1389 }
1390
GetAudioUntilNormal()1391 void GetAudioUntilNormal() {
1392 bool muted = false;
1393 while (out_frame_.speech_type_ != AudioFrame::kNormalSpeech) {
1394 EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1395 ASSERT_LT(counter_++, 1000) << "Test timed out";
1396 }
1397 EXPECT_FALSE(muted);
1398 }
1399
1400 int counter_ = 0;
1401 };
1402
1403 // Verifies that NetEq goes in and out of muted state as expected.
TEST_F(NetEqDecodingTestWithMutedState,MutedState)1404 TEST_F(NetEqDecodingTestWithMutedState, MutedState) {
1405 // Insert one speech packet.
1406 InsertPacket(0);
1407 // Pull out audio once and expect it not to be muted.
1408 EXPECT_FALSE(GetAudioReturnMuted());
1409 // Pull data until faded out.
1410 GetAudioUntilMuted();
1411 EXPECT_TRUE(out_frame_.muted());
1412
1413 // Verify that output audio is not written during muted mode. Other parameters
1414 // should be correct, though.
1415 AudioFrame new_frame;
1416 int16_t* frame_data = new_frame.mutable_data();
1417 for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
1418 frame_data[i] = 17;
1419 }
1420 bool muted;
1421 EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted));
1422 EXPECT_TRUE(muted);
1423 EXPECT_TRUE(out_frame_.muted());
1424 for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
1425 EXPECT_EQ(17, frame_data[i]);
1426 }
1427 EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_,
1428 new_frame.timestamp_);
1429 EXPECT_EQ(out_frame_.samples_per_channel_, new_frame.samples_per_channel_);
1430 EXPECT_EQ(out_frame_.sample_rate_hz_, new_frame.sample_rate_hz_);
1431 EXPECT_EQ(out_frame_.num_channels_, new_frame.num_channels_);
1432 EXPECT_EQ(out_frame_.speech_type_, new_frame.speech_type_);
1433 EXPECT_EQ(out_frame_.vad_activity_, new_frame.vad_activity_);
1434
1435 // Insert new data. Timestamp is corrected for the time elapsed since the last
1436 // packet. Verify that normal operation resumes.
1437 InsertPacket(kSamples * counter_);
1438 GetAudioUntilNormal();
1439 EXPECT_FALSE(out_frame_.muted());
1440
1441 NetEqNetworkStatistics stats;
1442 EXPECT_EQ(0, neteq_->NetworkStatistics(&stats));
1443 // NetEqNetworkStatistics::expand_rate tells the fraction of samples that were
1444 // concealment samples, in Q14 (16384 = 100%) .The vast majority should be
1445 // concealment samples in this test.
1446 EXPECT_GT(stats.expand_rate, 14000);
1447 // And, it should be greater than the speech_expand_rate.
1448 EXPECT_GT(stats.expand_rate, stats.speech_expand_rate);
1449 }
1450
1451 // Verifies that NetEq goes out of muted state when given a delayed packet.
TEST_F(NetEqDecodingTestWithMutedState,MutedStateDelayedPacket)1452 TEST_F(NetEqDecodingTestWithMutedState, MutedStateDelayedPacket) {
1453 // Insert one speech packet.
1454 InsertPacket(0);
1455 // Pull out audio once and expect it not to be muted.
1456 EXPECT_FALSE(GetAudioReturnMuted());
1457 // Pull data until faded out.
1458 GetAudioUntilMuted();
1459 // Insert new data. Timestamp is only corrected for the half of the time
1460 // elapsed since the last packet. That is, the new packet is delayed. Verify
1461 // that normal operation resumes.
1462 InsertPacket(kSamples * counter_ / 2);
1463 GetAudioUntilNormal();
1464 }
1465
1466 // Verifies that NetEq goes out of muted state when given a future packet.
TEST_F(NetEqDecodingTestWithMutedState,MutedStateFuturePacket)1467 TEST_F(NetEqDecodingTestWithMutedState, MutedStateFuturePacket) {
1468 // Insert one speech packet.
1469 InsertPacket(0);
1470 // Pull out audio once and expect it not to be muted.
1471 EXPECT_FALSE(GetAudioReturnMuted());
1472 // Pull data until faded out.
1473 GetAudioUntilMuted();
1474 // Insert new data. Timestamp is over-corrected for the time elapsed since the
1475 // last packet. That is, the new packet is too early. Verify that normal
1476 // operation resumes.
1477 InsertPacket(kSamples * counter_ * 2);
1478 GetAudioUntilNormal();
1479 }
1480
1481 // Verifies that NetEq goes out of muted state when given an old packet.
TEST_F(NetEqDecodingTestWithMutedState,MutedStateOldPacket)1482 TEST_F(NetEqDecodingTestWithMutedState, MutedStateOldPacket) {
1483 // Insert one speech packet.
1484 InsertPacket(0);
1485 // Pull out audio once and expect it not to be muted.
1486 EXPECT_FALSE(GetAudioReturnMuted());
1487 // Pull data until faded out.
1488 GetAudioUntilMuted();
1489
1490 EXPECT_NE(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1491 // Insert packet which is older than the first packet.
1492 InsertPacket(kSamples * (counter_ - 1000));
1493 EXPECT_FALSE(GetAudioReturnMuted());
1494 EXPECT_EQ(AudioFrame::kNormalSpeech, out_frame_.speech_type_);
1495 }
1496
1497 // Verifies that NetEq doesn't enter muted state when CNG mode is active and the
1498 // packet stream is suspended for a long time.
TEST_F(NetEqDecodingTestWithMutedState,DoNotMuteExtendedCngWithoutPackets)1499 TEST_F(NetEqDecodingTestWithMutedState, DoNotMuteExtendedCngWithoutPackets) {
1500 // Insert one CNG packet.
1501 InsertCngPacket(0);
1502
1503 // Pull 10 seconds of audio (10 ms audio generated per lap).
1504 for (int i = 0; i < 1000; ++i) {
1505 bool muted;
1506 EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1507 ASSERT_FALSE(muted);
1508 }
1509 EXPECT_EQ(AudioFrame::kCNG, out_frame_.speech_type_);
1510 }
1511
1512 // Verifies that NetEq goes back to normal after a long CNG period with the
1513 // packet stream suspended.
TEST_F(NetEqDecodingTestWithMutedState,RecoverAfterExtendedCngWithoutPackets)1514 TEST_F(NetEqDecodingTestWithMutedState, RecoverAfterExtendedCngWithoutPackets) {
1515 // Insert one CNG packet.
1516 InsertCngPacket(0);
1517
1518 // Pull 10 seconds of audio (10 ms audio generated per lap).
1519 for (int i = 0; i < 1000; ++i) {
1520 bool muted;
1521 EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
1522 }
1523
1524 // Insert new data. Timestamp is corrected for the time elapsed since the last
1525 // packet. Verify that normal operation resumes.
1526 InsertPacket(kSamples * counter_);
1527 GetAudioUntilNormal();
1528 }
1529
1530 class NetEqDecodingTestTwoInstances : public NetEqDecodingTest {
1531 public:
NetEqDecodingTestTwoInstances()1532 NetEqDecodingTestTwoInstances() : NetEqDecodingTest() {}
1533
SetUp()1534 void SetUp() override {
1535 NetEqDecodingTest::SetUp();
1536 config2_ = config_;
1537 }
1538
CreateSecondInstance()1539 void CreateSecondInstance() {
1540 neteq2_.reset(NetEq::Create(config2_, CreateBuiltinAudioDecoderFactory()));
1541 ASSERT_TRUE(neteq2_);
1542 LoadDecoders(neteq2_.get());
1543 }
1544
1545 protected:
1546 std::unique_ptr<NetEq> neteq2_;
1547 NetEq::Config config2_;
1548 };
1549
1550 namespace {
AudioFramesEqualExceptData(const AudioFrame & a,const AudioFrame & b)1551 ::testing::AssertionResult AudioFramesEqualExceptData(const AudioFrame& a,
1552 const AudioFrame& b) {
1553 if (a.timestamp_ != b.timestamp_)
1554 return ::testing::AssertionFailure() << "timestamp_ diff (" << a.timestamp_
1555 << " != " << b.timestamp_ << ")";
1556 if (a.sample_rate_hz_ != b.sample_rate_hz_)
1557 return ::testing::AssertionFailure() << "sample_rate_hz_ diff ("
1558 << a.sample_rate_hz_
1559 << " != " << b.sample_rate_hz_ << ")";
1560 if (a.samples_per_channel_ != b.samples_per_channel_)
1561 return ::testing::AssertionFailure()
1562 << "samples_per_channel_ diff (" << a.samples_per_channel_
1563 << " != " << b.samples_per_channel_ << ")";
1564 if (a.num_channels_ != b.num_channels_)
1565 return ::testing::AssertionFailure() << "num_channels_ diff ("
1566 << a.num_channels_
1567 << " != " << b.num_channels_ << ")";
1568 if (a.speech_type_ != b.speech_type_)
1569 return ::testing::AssertionFailure() << "speech_type_ diff ("
1570 << a.speech_type_
1571 << " != " << b.speech_type_ << ")";
1572 if (a.vad_activity_ != b.vad_activity_)
1573 return ::testing::AssertionFailure() << "vad_activity_ diff ("
1574 << a.vad_activity_
1575 << " != " << b.vad_activity_ << ")";
1576 return ::testing::AssertionSuccess();
1577 }
1578
AudioFramesEqual(const AudioFrame & a,const AudioFrame & b)1579 ::testing::AssertionResult AudioFramesEqual(const AudioFrame& a,
1580 const AudioFrame& b) {
1581 ::testing::AssertionResult res = AudioFramesEqualExceptData(a, b);
1582 if (!res)
1583 return res;
1584 if (memcmp(
1585 a.data(), b.data(),
1586 a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != 0) {
1587 return ::testing::AssertionFailure() << "data_ diff";
1588 }
1589 return ::testing::AssertionSuccess();
1590 }
1591
1592 } // namespace
1593
TEST_F(NetEqDecodingTestTwoInstances,CompareMutedStateOnOff)1594 TEST_F(NetEqDecodingTestTwoInstances, CompareMutedStateOnOff) {
1595 ASSERT_FALSE(config_.enable_muted_state);
1596 config2_.enable_muted_state = true;
1597 CreateSecondInstance();
1598
1599 // Insert one speech packet into both NetEqs.
1600 const size_t kSamples = 10 * 16;
1601 const size_t kPayloadBytes = kSamples * 2;
1602 uint8_t payload[kPayloadBytes] = {0};
1603 RTPHeader rtp_info;
1604 PopulateRtpInfo(0, 0, &rtp_info);
1605 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1606 EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload, 0));
1607
1608 AudioFrame out_frame1, out_frame2;
1609 bool muted;
1610 for (int i = 0; i < 1000; ++i) {
1611 std::ostringstream ss;
1612 ss << "i = " << i;
1613 SCOPED_TRACE(ss.str()); // Print out the loop iterator on failure.
1614 EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted));
1615 EXPECT_FALSE(muted);
1616 EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted));
1617 if (muted) {
1618 EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2));
1619 } else {
1620 EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2));
1621 }
1622 }
1623 EXPECT_TRUE(muted);
1624
1625 // Insert new data. Timestamp is corrected for the time elapsed since the last
1626 // packet.
1627 PopulateRtpInfo(0, kSamples * 1000, &rtp_info);
1628 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1629 EXPECT_EQ(0, neteq2_->InsertPacket(rtp_info, payload, 0));
1630
1631 int counter = 0;
1632 while (out_frame1.speech_type_ != AudioFrame::kNormalSpeech) {
1633 ASSERT_LT(counter++, 1000) << "Test timed out";
1634 std::ostringstream ss;
1635 ss << "counter = " << counter;
1636 SCOPED_TRACE(ss.str()); // Print out the loop iterator on failure.
1637 EXPECT_EQ(0, neteq_->GetAudio(&out_frame1, &muted));
1638 EXPECT_FALSE(muted);
1639 EXPECT_EQ(0, neteq2_->GetAudio(&out_frame2, &muted));
1640 if (muted) {
1641 EXPECT_TRUE(AudioFramesEqualExceptData(out_frame1, out_frame2));
1642 } else {
1643 EXPECT_TRUE(AudioFramesEqual(out_frame1, out_frame2));
1644 }
1645 }
1646 EXPECT_FALSE(muted);
1647 }
1648
TEST_F(NetEqDecodingTest,LastDecodedTimestampsEmpty)1649 TEST_F(NetEqDecodingTest, LastDecodedTimestampsEmpty) {
1650 EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
1651
1652 // Pull out data once.
1653 AudioFrame output;
1654 bool muted;
1655 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1656
1657 EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
1658 }
1659
TEST_F(NetEqDecodingTest,LastDecodedTimestampsOneDecoded)1660 TEST_F(NetEqDecodingTest, LastDecodedTimestampsOneDecoded) {
1661 // Insert one packet with PCM16b WB data (this is what PopulateRtpInfo does by
1662 // default). Make the length 10 ms.
1663 constexpr size_t kPayloadSamples = 16 * 10;
1664 constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
1665 uint8_t payload[kPayloadBytes] = {0};
1666
1667 RTPHeader rtp_info;
1668 constexpr uint32_t kRtpTimestamp = 0x1234;
1669 PopulateRtpInfo(0, kRtpTimestamp, &rtp_info);
1670 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1671
1672 // Pull out data once.
1673 AudioFrame output;
1674 bool muted;
1675 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1676
1677 EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp}),
1678 neteq_->LastDecodedTimestamps());
1679
1680 // Nothing decoded on the second call.
1681 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1682 EXPECT_TRUE(neteq_->LastDecodedTimestamps().empty());
1683 }
1684
TEST_F(NetEqDecodingTest,LastDecodedTimestampsTwoDecoded)1685 TEST_F(NetEqDecodingTest, LastDecodedTimestampsTwoDecoded) {
1686 // Insert two packets with PCM16b WB data (this is what PopulateRtpInfo does
1687 // by default). Make the length 5 ms so that NetEq must decode them both in
1688 // the same GetAudio call.
1689 constexpr size_t kPayloadSamples = 16 * 5;
1690 constexpr size_t kPayloadBytes = 2 * kPayloadSamples;
1691 uint8_t payload[kPayloadBytes] = {0};
1692
1693 RTPHeader rtp_info;
1694 constexpr uint32_t kRtpTimestamp1 = 0x1234;
1695 PopulateRtpInfo(0, kRtpTimestamp1, &rtp_info);
1696 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1697 constexpr uint32_t kRtpTimestamp2 = kRtpTimestamp1 + kPayloadSamples;
1698 PopulateRtpInfo(1, kRtpTimestamp2, &rtp_info);
1699 EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
1700
1701 // Pull out data once.
1702 AudioFrame output;
1703 bool muted;
1704 ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
1705
1706 EXPECT_EQ(std::vector<uint32_t>({kRtpTimestamp1, kRtpTimestamp2}),
1707 neteq_->LastDecodedTimestamps());
1708 }
1709
TEST_F(NetEqDecodingTest,TestConcealmentEvents)1710 TEST_F(NetEqDecodingTest, TestConcealmentEvents) {
1711 const int kNumConcealmentEvents = 19;
1712 const size_t kSamples = 10 * 16;
1713 const size_t kPayloadBytes = kSamples * 2;
1714 int seq_no = 0;
1715 RTPHeader rtp_info;
1716 rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC.
1717 rtp_info.payloadType = 94; // PCM16b WB codec.
1718 rtp_info.markerBit = 0;
1719 const uint8_t payload[kPayloadBytes] = {0};
1720 bool muted;
1721
1722 for (int i = 0; i < kNumConcealmentEvents; i++) {
1723 // Insert some packets of 10 ms size.
1724 for (int j = 0; j < 10; j++) {
1725 rtp_info.sequenceNumber = seq_no++;
1726 rtp_info.timestamp = rtp_info.sequenceNumber * kSamples;
1727 neteq_->InsertPacket(rtp_info, payload, 0);
1728 neteq_->GetAudio(&out_frame_, &muted);
1729 }
1730
1731 // Lose a number of packets.
1732 int num_lost = 1 + i;
1733 for (int j = 0; j < num_lost; j++) {
1734 seq_no++;
1735 neteq_->GetAudio(&out_frame_, &muted);
1736 }
1737 }
1738
1739 // Check number of concealment events.
1740 NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics();
1741 EXPECT_EQ(kNumConcealmentEvents, static_cast<int>(stats.concealment_events));
1742 }
1743
1744 // Test that the jitter buffer delay stat is computed correctly.
TestJitterBufferDelay(bool apply_packet_loss)1745 void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) {
1746 const int kNumPackets = 10;
1747 const int kDelayInNumPackets = 2;
1748 const int kPacketLenMs = 10; // All packets are of 10 ms size.
1749 const size_t kSamples = kPacketLenMs * 16;
1750 const size_t kPayloadBytes = kSamples * 2;
1751 RTPHeader rtp_info;
1752 rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC.
1753 rtp_info.payloadType = 94; // PCM16b WB codec.
1754 rtp_info.markerBit = 0;
1755 const uint8_t payload[kPayloadBytes] = {0};
1756 bool muted;
1757 int packets_sent = 0;
1758 int packets_received = 0;
1759 int expected_delay = 0;
1760 while (packets_received < kNumPackets) {
1761 // Insert packet.
1762 if (packets_sent < kNumPackets) {
1763 rtp_info.sequenceNumber = packets_sent++;
1764 rtp_info.timestamp = rtp_info.sequenceNumber * kSamples;
1765 neteq_->InsertPacket(rtp_info, payload, 0);
1766 }
1767
1768 // Get packet.
1769 if (packets_sent > kDelayInNumPackets) {
1770 neteq_->GetAudio(&out_frame_, &muted);
1771 packets_received++;
1772
1773 // The delay reported by the jitter buffer never exceeds
1774 // the number of samples previously fetched with GetAudio
1775 // (hence the min()).
1776 int packets_delay = std::min(packets_received, kDelayInNumPackets + 1);
1777
1778 // The increase of the expected delay is the product of
1779 // the current delay of the jitter buffer in ms * the
1780 // number of samples that are sent for play out.
1781 int current_delay_ms = packets_delay * kPacketLenMs;
1782 expected_delay += current_delay_ms * kSamples;
1783 }
1784 }
1785
1786 if (apply_packet_loss) {
1787 // Extra call to GetAudio to cause concealment.
1788 neteq_->GetAudio(&out_frame_, &muted);
1789 }
1790
1791 // Check jitter buffer delay.
1792 NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics();
1793 EXPECT_EQ(expected_delay, static_cast<int>(stats.jitter_buffer_delay_ms));
1794 }
1795
TEST_F(NetEqDecodingTestFaxMode,TestJitterBufferDelayWithoutLoss)1796 TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) {
1797 TestJitterBufferDelay(false);
1798 }
1799
TEST_F(NetEqDecodingTestFaxMode,TestJitterBufferDelayWithLoss)1800 TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) {
1801 TestJitterBufferDelay(true);
1802 }
1803
1804 } // namespace webrtc
1805