1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
12
13 #include <assert.h>
14 #include <memory.h> // memset
15
16 #include <algorithm>
17
18 #include "webrtc/base/checks.h"
19 #include "webrtc/base/logging.h"
20 #include "webrtc/base/safe_conversions.h"
21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
22 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
23 #include "webrtc/modules/audio_coding/neteq/accelerate.h"
24 #include "webrtc/modules/audio_coding/neteq/background_noise.h"
25 #include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
26 #include "webrtc/modules/audio_coding/neteq/comfort_noise.h"
27 #include "webrtc/modules/audio_coding/neteq/decision_logic.h"
28 #include "webrtc/modules/audio_coding/neteq/decoder_database.h"
29 #include "webrtc/modules/audio_coding/neteq/defines.h"
30 #include "webrtc/modules/audio_coding/neteq/delay_manager.h"
31 #include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
32 #include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
33 #include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
34 #include "webrtc/modules/audio_coding/neteq/expand.h"
35 #include "webrtc/modules/audio_coding/neteq/merge.h"
36 #include "webrtc/modules/audio_coding/neteq/nack.h"
37 #include "webrtc/modules/audio_coding/neteq/normal.h"
38 #include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
39 #include "webrtc/modules/audio_coding/neteq/packet.h"
40 #include "webrtc/modules/audio_coding/neteq/payload_splitter.h"
41 #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
42 #include "webrtc/modules/audio_coding/neteq/preemptive_expand.h"
43 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
44 #include "webrtc/modules/audio_coding/neteq/timestamp_scaler.h"
45 #include "webrtc/modules/include/module_common_types.h"
46 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
47
48 // Modify the code to obtain backwards bit-exactness. Once bit-exactness is no
49 // longer required, this #define should be removed (and the code that it
50 // enables).
51 #define LEGACY_BITEXACT
52
53 namespace webrtc {
54
NetEqImpl(const NetEq::Config & config,BufferLevelFilter * buffer_level_filter,DecoderDatabase * decoder_database,DelayManager * delay_manager,DelayPeakDetector * delay_peak_detector,DtmfBuffer * dtmf_buffer,DtmfToneGenerator * dtmf_tone_generator,PacketBuffer * packet_buffer,PayloadSplitter * payload_splitter,TimestampScaler * timestamp_scaler,AccelerateFactory * accelerate_factory,ExpandFactory * expand_factory,PreemptiveExpandFactory * preemptive_expand_factory,bool create_components)55 NetEqImpl::NetEqImpl(const NetEq::Config& config,
56 BufferLevelFilter* buffer_level_filter,
57 DecoderDatabase* decoder_database,
58 DelayManager* delay_manager,
59 DelayPeakDetector* delay_peak_detector,
60 DtmfBuffer* dtmf_buffer,
61 DtmfToneGenerator* dtmf_tone_generator,
62 PacketBuffer* packet_buffer,
63 PayloadSplitter* payload_splitter,
64 TimestampScaler* timestamp_scaler,
65 AccelerateFactory* accelerate_factory,
66 ExpandFactory* expand_factory,
67 PreemptiveExpandFactory* preemptive_expand_factory,
68 bool create_components)
69 : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
70 buffer_level_filter_(buffer_level_filter),
71 decoder_database_(decoder_database),
72 delay_manager_(delay_manager),
73 delay_peak_detector_(delay_peak_detector),
74 dtmf_buffer_(dtmf_buffer),
75 dtmf_tone_generator_(dtmf_tone_generator),
76 packet_buffer_(packet_buffer),
77 payload_splitter_(payload_splitter),
78 timestamp_scaler_(timestamp_scaler),
79 vad_(new PostDecodeVad()),
80 expand_factory_(expand_factory),
81 accelerate_factory_(accelerate_factory),
82 preemptive_expand_factory_(preemptive_expand_factory),
83 last_mode_(kModeNormal),
84 decoded_buffer_length_(kMaxFrameSize),
85 decoded_buffer_(new int16_t[decoded_buffer_length_]),
86 playout_timestamp_(0),
87 new_codec_(false),
88 timestamp_(0),
89 reset_decoder_(false),
90 current_rtp_payload_type_(0xFF), // Invalid RTP payload type.
91 current_cng_rtp_payload_type_(0xFF), // Invalid RTP payload type.
92 ssrc_(0),
93 first_packet_(true),
94 error_code_(0),
95 decoder_error_code_(0),
96 background_noise_mode_(config.background_noise_mode),
97 playout_mode_(config.playout_mode),
98 enable_fast_accelerate_(config.enable_fast_accelerate),
99 nack_enabled_(false) {
100 LOG(LS_INFO) << "NetEq config: " << config.ToString();
101 int fs = config.sample_rate_hz;
102 if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
103 LOG(LS_ERROR) << "Sample rate " << fs << " Hz not supported. " <<
104 "Changing to 8000 Hz.";
105 fs = 8000;
106 }
107 fs_hz_ = fs;
108 fs_mult_ = fs / 8000;
109 output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
110 decoder_frame_length_ = 3 * output_size_samples_;
111 WebRtcSpl_Init();
112 if (create_components) {
113 SetSampleRateAndChannels(fs, 1); // Default is 1 channel.
114 }
115 RTC_DCHECK(!vad_->enabled());
116 if (config.enable_post_decode_vad) {
117 vad_->Enable();
118 }
119 }
120
121 NetEqImpl::~NetEqImpl() = default;
122
InsertPacket(const WebRtcRTPHeader & rtp_header,const uint8_t * payload,size_t length_bytes,uint32_t receive_timestamp)123 int NetEqImpl::InsertPacket(const WebRtcRTPHeader& rtp_header,
124 const uint8_t* payload,
125 size_t length_bytes,
126 uint32_t receive_timestamp) {
127 CriticalSectionScoped lock(crit_sect_.get());
128 LOG(LS_VERBOSE) << "InsertPacket: ts=" << rtp_header.header.timestamp <<
129 ", sn=" << rtp_header.header.sequenceNumber <<
130 ", pt=" << static_cast<int>(rtp_header.header.payloadType) <<
131 ", ssrc=" << rtp_header.header.ssrc <<
132 ", len=" << length_bytes;
133 int error = InsertPacketInternal(rtp_header, payload, length_bytes,
134 receive_timestamp, false);
135 if (error != 0) {
136 error_code_ = error;
137 return kFail;
138 }
139 return kOK;
140 }
141
InsertSyncPacket(const WebRtcRTPHeader & rtp_header,uint32_t receive_timestamp)142 int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
143 uint32_t receive_timestamp) {
144 CriticalSectionScoped lock(crit_sect_.get());
145 LOG(LS_VERBOSE) << "InsertPacket-Sync: ts="
146 << rtp_header.header.timestamp <<
147 ", sn=" << rtp_header.header.sequenceNumber <<
148 ", pt=" << static_cast<int>(rtp_header.header.payloadType) <<
149 ", ssrc=" << rtp_header.header.ssrc;
150
151 const uint8_t kSyncPayload[] = { 's', 'y', 'n', 'c' };
152 int error = InsertPacketInternal(
153 rtp_header, kSyncPayload, sizeof(kSyncPayload), receive_timestamp, true);
154
155 if (error != 0) {
156 error_code_ = error;
157 return kFail;
158 }
159 return kOK;
160 }
161
GetAudio(size_t max_length,int16_t * output_audio,size_t * samples_per_channel,int * num_channels,NetEqOutputType * type)162 int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio,
163 size_t* samples_per_channel, int* num_channels,
164 NetEqOutputType* type) {
165 CriticalSectionScoped lock(crit_sect_.get());
166 LOG(LS_VERBOSE) << "GetAudio";
167 int error = GetAudioInternal(max_length, output_audio, samples_per_channel,
168 num_channels);
169 LOG(LS_VERBOSE) << "Produced " << *samples_per_channel <<
170 " samples/channel for " << *num_channels << " channel(s)";
171 if (error != 0) {
172 error_code_ = error;
173 return kFail;
174 }
175 if (type) {
176 *type = LastOutputType();
177 }
178 return kOK;
179 }
180
RegisterPayloadType(NetEqDecoder codec,uint8_t rtp_payload_type)181 int NetEqImpl::RegisterPayloadType(NetEqDecoder codec,
182 uint8_t rtp_payload_type) {
183 CriticalSectionScoped lock(crit_sect_.get());
184 LOG(LS_VERBOSE) << "RegisterPayloadType "
185 << static_cast<int>(rtp_payload_type) << " "
186 << static_cast<int>(codec);
187 int ret = decoder_database_->RegisterPayload(rtp_payload_type, codec);
188 if (ret != DecoderDatabase::kOK) {
189 switch (ret) {
190 case DecoderDatabase::kInvalidRtpPayloadType:
191 error_code_ = kInvalidRtpPayloadType;
192 break;
193 case DecoderDatabase::kCodecNotSupported:
194 error_code_ = kCodecNotSupported;
195 break;
196 case DecoderDatabase::kDecoderExists:
197 error_code_ = kDecoderExists;
198 break;
199 default:
200 error_code_ = kOtherError;
201 }
202 return kFail;
203 }
204 return kOK;
205 }
206
RegisterExternalDecoder(AudioDecoder * decoder,NetEqDecoder codec,uint8_t rtp_payload_type,int sample_rate_hz)207 int NetEqImpl::RegisterExternalDecoder(AudioDecoder* decoder,
208 NetEqDecoder codec,
209 uint8_t rtp_payload_type,
210 int sample_rate_hz) {
211 CriticalSectionScoped lock(crit_sect_.get());
212 LOG(LS_VERBOSE) << "RegisterExternalDecoder "
213 << static_cast<int>(rtp_payload_type) << " "
214 << static_cast<int>(codec);
215 if (!decoder) {
216 LOG(LS_ERROR) << "Cannot register external decoder with NULL pointer";
217 assert(false);
218 return kFail;
219 }
220 int ret = decoder_database_->InsertExternal(rtp_payload_type, codec,
221 sample_rate_hz, decoder);
222 if (ret != DecoderDatabase::kOK) {
223 switch (ret) {
224 case DecoderDatabase::kInvalidRtpPayloadType:
225 error_code_ = kInvalidRtpPayloadType;
226 break;
227 case DecoderDatabase::kCodecNotSupported:
228 error_code_ = kCodecNotSupported;
229 break;
230 case DecoderDatabase::kDecoderExists:
231 error_code_ = kDecoderExists;
232 break;
233 case DecoderDatabase::kInvalidSampleRate:
234 error_code_ = kInvalidSampleRate;
235 break;
236 case DecoderDatabase::kInvalidPointer:
237 error_code_ = kInvalidPointer;
238 break;
239 default:
240 error_code_ = kOtherError;
241 }
242 return kFail;
243 }
244 return kOK;
245 }
246
RemovePayloadType(uint8_t rtp_payload_type)247 int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) {
248 CriticalSectionScoped lock(crit_sect_.get());
249 int ret = decoder_database_->Remove(rtp_payload_type);
250 if (ret == DecoderDatabase::kOK) {
251 return kOK;
252 } else if (ret == DecoderDatabase::kDecoderNotFound) {
253 error_code_ = kDecoderNotFound;
254 } else {
255 error_code_ = kOtherError;
256 }
257 return kFail;
258 }
259
SetMinimumDelay(int delay_ms)260 bool NetEqImpl::SetMinimumDelay(int delay_ms) {
261 CriticalSectionScoped lock(crit_sect_.get());
262 if (delay_ms >= 0 && delay_ms < 10000) {
263 assert(delay_manager_.get());
264 return delay_manager_->SetMinimumDelay(delay_ms);
265 }
266 return false;
267 }
268
SetMaximumDelay(int delay_ms)269 bool NetEqImpl::SetMaximumDelay(int delay_ms) {
270 CriticalSectionScoped lock(crit_sect_.get());
271 if (delay_ms >= 0 && delay_ms < 10000) {
272 assert(delay_manager_.get());
273 return delay_manager_->SetMaximumDelay(delay_ms);
274 }
275 return false;
276 }
277
LeastRequiredDelayMs() const278 int NetEqImpl::LeastRequiredDelayMs() const {
279 CriticalSectionScoped lock(crit_sect_.get());
280 assert(delay_manager_.get());
281 return delay_manager_->least_required_delay_ms();
282 }
283
SetTargetDelay()284 int NetEqImpl::SetTargetDelay() {
285 return kNotImplemented;
286 }
287
TargetDelay()288 int NetEqImpl::TargetDelay() {
289 return kNotImplemented;
290 }
291
CurrentDelayMs() const292 int NetEqImpl::CurrentDelayMs() const {
293 CriticalSectionScoped lock(crit_sect_.get());
294 if (fs_hz_ == 0)
295 return 0;
296 // Sum up the samples in the packet buffer with the future length of the sync
297 // buffer, and divide the sum by the sample rate.
298 const size_t delay_samples =
299 packet_buffer_->NumSamplesInBuffer(decoder_database_.get(),
300 decoder_frame_length_) +
301 sync_buffer_->FutureLength();
302 // The division below will truncate.
303 const int delay_ms =
304 static_cast<int>(delay_samples) / rtc::CheckedDivExact(fs_hz_, 1000);
305 return delay_ms;
306 }
307
308 // Deprecated.
309 // TODO(henrik.lundin) Delete.
SetPlayoutMode(NetEqPlayoutMode mode)310 void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) {
311 CriticalSectionScoped lock(crit_sect_.get());
312 if (mode != playout_mode_) {
313 playout_mode_ = mode;
314 CreateDecisionLogic();
315 }
316 }
317
318 // Deprecated.
319 // TODO(henrik.lundin) Delete.
PlayoutMode() const320 NetEqPlayoutMode NetEqImpl::PlayoutMode() const {
321 CriticalSectionScoped lock(crit_sect_.get());
322 return playout_mode_;
323 }
324
NetworkStatistics(NetEqNetworkStatistics * stats)325 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
326 CriticalSectionScoped lock(crit_sect_.get());
327 assert(decoder_database_.get());
328 const size_t total_samples_in_buffers =
329 packet_buffer_->NumSamplesInBuffer(decoder_database_.get(),
330 decoder_frame_length_) +
331 sync_buffer_->FutureLength();
332 assert(delay_manager_.get());
333 assert(decision_logic_.get());
334 stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
335 decoder_frame_length_, *delay_manager_.get(),
336 *decision_logic_.get(), stats);
337 return 0;
338 }
339
GetRtcpStatistics(RtcpStatistics * stats)340 void NetEqImpl::GetRtcpStatistics(RtcpStatistics* stats) {
341 CriticalSectionScoped lock(crit_sect_.get());
342 if (stats) {
343 rtcp_.GetStatistics(false, stats);
344 }
345 }
346
GetRtcpStatisticsNoReset(RtcpStatistics * stats)347 void NetEqImpl::GetRtcpStatisticsNoReset(RtcpStatistics* stats) {
348 CriticalSectionScoped lock(crit_sect_.get());
349 if (stats) {
350 rtcp_.GetStatistics(true, stats);
351 }
352 }
353
EnableVad()354 void NetEqImpl::EnableVad() {
355 CriticalSectionScoped lock(crit_sect_.get());
356 assert(vad_.get());
357 vad_->Enable();
358 }
359
DisableVad()360 void NetEqImpl::DisableVad() {
361 CriticalSectionScoped lock(crit_sect_.get());
362 assert(vad_.get());
363 vad_->Disable();
364 }
365
GetPlayoutTimestamp(uint32_t * timestamp)366 bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) {
367 CriticalSectionScoped lock(crit_sect_.get());
368 if (first_packet_) {
369 // We don't have a valid RTP timestamp until we have decoded our first
370 // RTP packet.
371 return false;
372 }
373 *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_);
374 return true;
375 }
376
SetTargetNumberOfChannels()377 int NetEqImpl::SetTargetNumberOfChannels() {
378 return kNotImplemented;
379 }
380
SetTargetSampleRate()381 int NetEqImpl::SetTargetSampleRate() {
382 return kNotImplemented;
383 }
384
LastError() const385 int NetEqImpl::LastError() const {
386 CriticalSectionScoped lock(crit_sect_.get());
387 return error_code_;
388 }
389
LastDecoderError()390 int NetEqImpl::LastDecoderError() {
391 CriticalSectionScoped lock(crit_sect_.get());
392 return decoder_error_code_;
393 }
394
FlushBuffers()395 void NetEqImpl::FlushBuffers() {
396 CriticalSectionScoped lock(crit_sect_.get());
397 LOG(LS_VERBOSE) << "FlushBuffers";
398 packet_buffer_->Flush();
399 assert(sync_buffer_.get());
400 assert(expand_.get());
401 sync_buffer_->Flush();
402 sync_buffer_->set_next_index(sync_buffer_->next_index() -
403 expand_->overlap_length());
404 // Set to wait for new codec.
405 first_packet_ = true;
406 }
407
PacketBufferStatistics(int * current_num_packets,int * max_num_packets) const408 void NetEqImpl::PacketBufferStatistics(int* current_num_packets,
409 int* max_num_packets) const {
410 CriticalSectionScoped lock(crit_sect_.get());
411 packet_buffer_->BufferStat(current_num_packets, max_num_packets);
412 }
413
EnableNack(size_t max_nack_list_size)414 void NetEqImpl::EnableNack(size_t max_nack_list_size) {
415 CriticalSectionScoped lock(crit_sect_.get());
416 if (!nack_enabled_) {
417 const int kNackThresholdPackets = 2;
418 nack_.reset(Nack::Create(kNackThresholdPackets));
419 nack_enabled_ = true;
420 nack_->UpdateSampleRate(fs_hz_);
421 }
422 nack_->SetMaxNackListSize(max_nack_list_size);
423 }
424
DisableNack()425 void NetEqImpl::DisableNack() {
426 CriticalSectionScoped lock(crit_sect_.get());
427 nack_.reset();
428 nack_enabled_ = false;
429 }
430
GetNackList(int64_t round_trip_time_ms) const431 std::vector<uint16_t> NetEqImpl::GetNackList(int64_t round_trip_time_ms) const {
432 CriticalSectionScoped lock(crit_sect_.get());
433 if (!nack_enabled_) {
434 return std::vector<uint16_t>();
435 }
436 RTC_DCHECK(nack_.get());
437 return nack_->GetNackList(round_trip_time_ms);
438 }
439
sync_buffer_for_test() const440 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
441 CriticalSectionScoped lock(crit_sect_.get());
442 return sync_buffer_.get();
443 }
444
445 // Methods below this line are private.
446
InsertPacketInternal(const WebRtcRTPHeader & rtp_header,const uint8_t * payload,size_t length_bytes,uint32_t receive_timestamp,bool is_sync_packet)447 int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
448 const uint8_t* payload,
449 size_t length_bytes,
450 uint32_t receive_timestamp,
451 bool is_sync_packet) {
452 if (!payload) {
453 LOG_F(LS_ERROR) << "payload == NULL";
454 return kInvalidPointer;
455 }
456 // Sanity checks for sync-packets.
457 if (is_sync_packet) {
458 if (decoder_database_->IsDtmf(rtp_header.header.payloadType) ||
459 decoder_database_->IsRed(rtp_header.header.payloadType) ||
460 decoder_database_->IsComfortNoise(rtp_header.header.payloadType)) {
461 LOG_F(LS_ERROR) << "Sync-packet with an unacceptable payload type "
462 << static_cast<int>(rtp_header.header.payloadType);
463 return kSyncPacketNotAccepted;
464 }
465 if (first_packet_ ||
466 rtp_header.header.payloadType != current_rtp_payload_type_ ||
467 rtp_header.header.ssrc != ssrc_) {
468 // Even if |current_rtp_payload_type_| is 0xFF, sync-packet isn't
469 // accepted.
470 LOG_F(LS_ERROR)
471 << "Changing codec, SSRC or first packet with sync-packet.";
472 return kSyncPacketNotAccepted;
473 }
474 }
475 PacketList packet_list;
476 RTPHeader main_header;
477 {
478 // Convert to Packet.
479 // Create |packet| within this separate scope, since it should not be used
480 // directly once it's been inserted in the packet list. This way, |packet|
481 // is not defined outside of this block.
482 Packet* packet = new Packet;
483 packet->header.markerBit = false;
484 packet->header.payloadType = rtp_header.header.payloadType;
485 packet->header.sequenceNumber = rtp_header.header.sequenceNumber;
486 packet->header.timestamp = rtp_header.header.timestamp;
487 packet->header.ssrc = rtp_header.header.ssrc;
488 packet->header.numCSRCs = 0;
489 packet->payload_length = length_bytes;
490 packet->primary = true;
491 packet->waiting_time = 0;
492 packet->payload = new uint8_t[packet->payload_length];
493 packet->sync_packet = is_sync_packet;
494 if (!packet->payload) {
495 LOG_F(LS_ERROR) << "Payload pointer is NULL.";
496 }
497 assert(payload); // Already checked above.
498 memcpy(packet->payload, payload, packet->payload_length);
499 // Insert packet in a packet list.
500 packet_list.push_back(packet);
501 // Save main payloads header for later.
502 memcpy(&main_header, &packet->header, sizeof(main_header));
503 }
504
505 bool update_sample_rate_and_channels = false;
506 // Reinitialize NetEq if it's needed (changed SSRC or first call).
507 if ((main_header.ssrc != ssrc_) || first_packet_) {
508 // Note: |first_packet_| will be cleared further down in this method, once
509 // the packet has been successfully inserted into the packet buffer.
510
511 rtcp_.Init(main_header.sequenceNumber);
512
513 // Flush the packet buffer and DTMF buffer.
514 packet_buffer_->Flush();
515 dtmf_buffer_->Flush();
516
517 // Store new SSRC.
518 ssrc_ = main_header.ssrc;
519
520 // Update audio buffer timestamp.
521 sync_buffer_->IncreaseEndTimestamp(main_header.timestamp - timestamp_);
522
523 // Update codecs.
524 timestamp_ = main_header.timestamp;
525 current_rtp_payload_type_ = main_header.payloadType;
526
527 // Reset timestamp scaling.
528 timestamp_scaler_->Reset();
529
530 // Trigger an update of sampling rate and the number of channels.
531 update_sample_rate_and_channels = true;
532 }
533
534 // Update RTCP statistics, only for regular packets.
535 if (!is_sync_packet)
536 rtcp_.Update(main_header, receive_timestamp);
537
538 // Check for RED payload type, and separate payloads into several packets.
539 if (decoder_database_->IsRed(main_header.payloadType)) {
540 assert(!is_sync_packet); // We had a sanity check for this.
541 if (payload_splitter_->SplitRed(&packet_list) != PayloadSplitter::kOK) {
542 PacketBuffer::DeleteAllPackets(&packet_list);
543 return kRedundancySplitError;
544 }
545 // Only accept a few RED payloads of the same type as the main data,
546 // DTMF events and CNG.
547 payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_);
548 // Update the stored main payload header since the main payload has now
549 // changed.
550 memcpy(&main_header, &packet_list.front()->header, sizeof(main_header));
551 }
552
553 // Check payload types.
554 if (decoder_database_->CheckPayloadTypes(packet_list) ==
555 DecoderDatabase::kDecoderNotFound) {
556 PacketBuffer::DeleteAllPackets(&packet_list);
557 return kUnknownRtpPayloadType;
558 }
559
560 // Scale timestamp to internal domain (only for some codecs).
561 timestamp_scaler_->ToInternal(&packet_list);
562
563 // Process DTMF payloads. Cycle through the list of packets, and pick out any
564 // DTMF payloads found.
565 PacketList::iterator it = packet_list.begin();
566 while (it != packet_list.end()) {
567 Packet* current_packet = (*it);
568 assert(current_packet);
569 assert(current_packet->payload);
570 if (decoder_database_->IsDtmf(current_packet->header.payloadType)) {
571 assert(!current_packet->sync_packet); // We had a sanity check for this.
572 DtmfEvent event;
573 int ret = DtmfBuffer::ParseEvent(
574 current_packet->header.timestamp,
575 current_packet->payload,
576 current_packet->payload_length,
577 &event);
578 if (ret != DtmfBuffer::kOK) {
579 PacketBuffer::DeleteAllPackets(&packet_list);
580 return kDtmfParsingError;
581 }
582 if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) {
583 PacketBuffer::DeleteAllPackets(&packet_list);
584 return kDtmfInsertError;
585 }
586 // TODO(hlundin): Let the destructor of Packet handle the payload.
587 delete [] current_packet->payload;
588 delete current_packet;
589 it = packet_list.erase(it);
590 } else {
591 ++it;
592 }
593 }
594
595 // Check for FEC in packets, and separate payloads into several packets.
596 int ret = payload_splitter_->SplitFec(&packet_list, decoder_database_.get());
597 if (ret != PayloadSplitter::kOK) {
598 PacketBuffer::DeleteAllPackets(&packet_list);
599 switch (ret) {
600 case PayloadSplitter::kUnknownPayloadType:
601 return kUnknownRtpPayloadType;
602 default:
603 return kOtherError;
604 }
605 }
606
607 // Split payloads into smaller chunks. This also verifies that all payloads
608 // are of a known payload type. SplitAudio() method is protected against
609 // sync-packets.
610 ret = payload_splitter_->SplitAudio(&packet_list, *decoder_database_);
611 if (ret != PayloadSplitter::kOK) {
612 PacketBuffer::DeleteAllPackets(&packet_list);
613 switch (ret) {
614 case PayloadSplitter::kUnknownPayloadType:
615 return kUnknownRtpPayloadType;
616 case PayloadSplitter::kFrameSplitError:
617 return kFrameSplitError;
618 default:
619 return kOtherError;
620 }
621 }
622
623 // Update bandwidth estimate, if the packet is not sync-packet.
624 if (!packet_list.empty() && !packet_list.front()->sync_packet) {
625 // The list can be empty here if we got nothing but DTMF payloads.
626 AudioDecoder* decoder =
627 decoder_database_->GetDecoder(main_header.payloadType);
628 assert(decoder); // Should always get a valid object, since we have
629 // already checked that the payload types are known.
630 decoder->IncomingPacket(packet_list.front()->payload,
631 packet_list.front()->payload_length,
632 packet_list.front()->header.sequenceNumber,
633 packet_list.front()->header.timestamp,
634 receive_timestamp);
635 }
636
637 if (nack_enabled_) {
638 RTC_DCHECK(nack_);
639 if (update_sample_rate_and_channels) {
640 nack_->Reset();
641 }
642 nack_->UpdateLastReceivedPacket(packet_list.front()->header.sequenceNumber,
643 packet_list.front()->header.timestamp);
644 }
645
646 // Insert packets in buffer.
647 const size_t buffer_length_before_insert =
648 packet_buffer_->NumPacketsInBuffer();
649 ret = packet_buffer_->InsertPacketList(
650 &packet_list,
651 *decoder_database_,
652 ¤t_rtp_payload_type_,
653 ¤t_cng_rtp_payload_type_);
654 if (ret == PacketBuffer::kFlushed) {
655 // Reset DSP timestamp etc. if packet buffer flushed.
656 new_codec_ = true;
657 update_sample_rate_and_channels = true;
658 } else if (ret != PacketBuffer::kOK) {
659 PacketBuffer::DeleteAllPackets(&packet_list);
660 return kOtherError;
661 }
662
663 if (first_packet_) {
664 first_packet_ = false;
665 // Update the codec on the next GetAudio call.
666 new_codec_ = true;
667 }
668
669 if (current_rtp_payload_type_ != 0xFF) {
670 const DecoderDatabase::DecoderInfo* dec_info =
671 decoder_database_->GetDecoderInfo(current_rtp_payload_type_);
672 if (!dec_info) {
673 assert(false); // Already checked that the payload type is known.
674 }
675 }
676
677 if (update_sample_rate_and_channels && !packet_buffer_->Empty()) {
678 // We do not use |current_rtp_payload_type_| to |set payload_type|, but
679 // get the next RTP header from |packet_buffer_| to obtain the payload type.
680 // The reason for it is the following corner case. If NetEq receives a
681 // CNG packet with a sample rate different than the current CNG then it
682 // flushes its buffer, assuming send codec must have been changed. However,
683 // payload type of the hypothetically new send codec is not known.
684 const RTPHeader* rtp_header = packet_buffer_->NextRtpHeader();
685 assert(rtp_header);
686 int payload_type = rtp_header->payloadType;
687 AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type);
688 assert(decoder); // Payloads are already checked to be valid.
689 const DecoderDatabase::DecoderInfo* decoder_info =
690 decoder_database_->GetDecoderInfo(payload_type);
691 assert(decoder_info);
692 if (decoder_info->fs_hz != fs_hz_ ||
693 decoder->Channels() != algorithm_buffer_->Channels()) {
694 SetSampleRateAndChannels(decoder_info->fs_hz, decoder->Channels());
695 }
696 if (nack_enabled_) {
697 RTC_DCHECK(nack_);
698 // Update the sample rate even if the rate is not new, because of Reset().
699 nack_->UpdateSampleRate(fs_hz_);
700 }
701 }
702
703 // TODO(hlundin): Move this code to DelayManager class.
704 const DecoderDatabase::DecoderInfo* dec_info =
705 decoder_database_->GetDecoderInfo(main_header.payloadType);
706 assert(dec_info); // Already checked that the payload type is known.
707 delay_manager_->LastDecoderType(dec_info->codec_type);
708 if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
709 // Calculate the total speech length carried in each packet.
710 const size_t buffer_length_after_insert =
711 packet_buffer_->NumPacketsInBuffer();
712
713 if (buffer_length_after_insert > buffer_length_before_insert) {
714 const size_t packet_length_samples =
715 (buffer_length_after_insert - buffer_length_before_insert) *
716 decoder_frame_length_;
717 if (packet_length_samples != decision_logic_->packet_length_samples()) {
718 decision_logic_->set_packet_length_samples(packet_length_samples);
719 delay_manager_->SetPacketAudioLength(
720 rtc::checked_cast<int>((1000 * packet_length_samples) / fs_hz_));
721 }
722 }
723
724 // Update statistics.
725 if ((int32_t) (main_header.timestamp - timestamp_) >= 0 &&
726 !new_codec_) {
727 // Only update statistics if incoming packet is not older than last played
728 // out packet, and if new codec flag is not set.
729 delay_manager_->Update(main_header.sequenceNumber, main_header.timestamp,
730 fs_hz_);
731 }
732 } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
733 // This is first "normal" packet after CNG or DTMF.
734 // Reset packet time counter and measure time until next packet,
735 // but don't update statistics.
736 delay_manager_->set_last_pack_cng_or_dtmf(0);
737 delay_manager_->ResetPacketIatCount();
738 }
739 return 0;
740 }
741
GetAudioInternal(size_t max_length,int16_t * output,size_t * samples_per_channel,int * num_channels)742 int NetEqImpl::GetAudioInternal(size_t max_length,
743 int16_t* output,
744 size_t* samples_per_channel,
745 int* num_channels) {
746 PacketList packet_list;
747 DtmfEvent dtmf_event;
748 Operations operation;
749 bool play_dtmf;
750 int return_value = GetDecision(&operation, &packet_list, &dtmf_event,
751 &play_dtmf);
752 if (return_value != 0) {
753 last_mode_ = kModeError;
754 return return_value;
755 }
756 LOG(LS_VERBOSE) << "GetDecision returned operation=" << operation <<
757 " and " << packet_list.size() << " packet(s)";
758
759 AudioDecoder::SpeechType speech_type;
760 int length = 0;
761 int decode_return_value = Decode(&packet_list, &operation,
762 &length, &speech_type);
763
764 assert(vad_.get());
765 bool sid_frame_available =
766 (operation == kRfc3389Cng && !packet_list.empty());
767 vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
768 sid_frame_available, fs_hz_);
769
770 algorithm_buffer_->Clear();
771 switch (operation) {
772 case kNormal: {
773 DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
774 break;
775 }
776 case kMerge: {
777 DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf);
778 break;
779 }
780 case kExpand: {
781 return_value = DoExpand(play_dtmf);
782 break;
783 }
784 case kAccelerate:
785 case kFastAccelerate: {
786 const bool fast_accelerate =
787 enable_fast_accelerate_ && (operation == kFastAccelerate);
788 return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type,
789 play_dtmf, fast_accelerate);
790 break;
791 }
792 case kPreemptiveExpand: {
793 return_value = DoPreemptiveExpand(decoded_buffer_.get(), length,
794 speech_type, play_dtmf);
795 break;
796 }
797 case kRfc3389Cng:
798 case kRfc3389CngNoPacket: {
799 return_value = DoRfc3389Cng(&packet_list, play_dtmf);
800 break;
801 }
802 case kCodecInternalCng: {
803 // This handles the case when there is no transmission and the decoder
804 // should produce internal comfort noise.
805 // TODO(hlundin): Write test for codec-internal CNG.
806 DoCodecInternalCng(decoded_buffer_.get(), length);
807 break;
808 }
809 case kDtmf: {
810 // TODO(hlundin): Write test for this.
811 return_value = DoDtmf(dtmf_event, &play_dtmf);
812 break;
813 }
814 case kAlternativePlc: {
815 // TODO(hlundin): Write test for this.
816 DoAlternativePlc(false);
817 break;
818 }
819 case kAlternativePlcIncreaseTimestamp: {
820 // TODO(hlundin): Write test for this.
821 DoAlternativePlc(true);
822 break;
823 }
824 case kAudioRepetitionIncreaseTimestamp: {
825 // TODO(hlundin): Write test for this.
826 sync_buffer_->IncreaseEndTimestamp(
827 static_cast<uint32_t>(output_size_samples_));
828 // Skipping break on purpose. Execution should move on into the
829 // next case.
830 FALLTHROUGH();
831 }
832 case kAudioRepetition: {
833 // TODO(hlundin): Write test for this.
834 // Copy last |output_size_samples_| from |sync_buffer_| to
835 // |algorithm_buffer|.
836 algorithm_buffer_->PushBackFromIndex(
837 *sync_buffer_, sync_buffer_->Size() - output_size_samples_);
838 expand_->Reset();
839 break;
840 }
841 case kUndefined: {
842 LOG(LS_ERROR) << "Invalid operation kUndefined.";
843 assert(false); // This should not happen.
844 last_mode_ = kModeError;
845 return kInvalidOperation;
846 }
847 } // End of switch.
848 if (return_value < 0) {
849 return return_value;
850 }
851
852 if (last_mode_ != kModeRfc3389Cng) {
853 comfort_noise_->Reset();
854 }
855
856 // Copy from |algorithm_buffer| to |sync_buffer_|.
857 sync_buffer_->PushBack(*algorithm_buffer_);
858
859 // Extract data from |sync_buffer_| to |output|.
860 size_t num_output_samples_per_channel = output_size_samples_;
861 size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels();
862 if (num_output_samples > max_length) {
863 LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " <<
864 output_size_samples_ << " * " << sync_buffer_->Channels();
865 num_output_samples = max_length;
866 num_output_samples_per_channel = max_length / sync_buffer_->Channels();
867 }
868 const size_t samples_from_sync =
869 sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
870 output);
871 *num_channels = static_cast<int>(sync_buffer_->Channels());
872 LOG(LS_VERBOSE) << "Sync buffer (" << *num_channels << " channel(s)):" <<
873 " insert " << algorithm_buffer_->Size() << " samples, extract " <<
874 samples_from_sync << " samples";
875 if (sync_buffer_->FutureLength() < expand_->overlap_length()) {
876 // The sync buffer should always contain |overlap_length| samples, but now
877 // too many samples have been extracted. Reinstall the |overlap_length|
878 // lookahead by moving the index.
879 const size_t missing_lookahead_samples =
880 expand_->overlap_length() - sync_buffer_->FutureLength();
881 RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples);
882 sync_buffer_->set_next_index(sync_buffer_->next_index() -
883 missing_lookahead_samples);
884 }
885 if (samples_from_sync != output_size_samples_) {
886 LOG(LS_ERROR) << "samples_from_sync (" << samples_from_sync
887 << ") != output_size_samples_ (" << output_size_samples_
888 << ")";
889 // TODO(minyue): treatment of under-run, filling zeros
890 memset(output, 0, num_output_samples * sizeof(int16_t));
891 *samples_per_channel = output_size_samples_;
892 return kSampleUnderrun;
893 }
894 *samples_per_channel = output_size_samples_;
895
896 // Should always have overlap samples left in the |sync_buffer_|.
897 RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
898
899 if (play_dtmf) {
900 return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), output);
901 }
902
903 // Update the background noise parameters if last operation wrote data
904 // straight from the decoder to the |sync_buffer_|. That is, none of the
905 // operations that modify the signal can be followed by a parameter update.
906 if ((last_mode_ == kModeNormal) ||
907 (last_mode_ == kModeAccelerateFail) ||
908 (last_mode_ == kModePreemptiveExpandFail) ||
909 (last_mode_ == kModeRfc3389Cng) ||
910 (last_mode_ == kModeCodecInternalCng)) {
911 background_noise_->Update(*sync_buffer_, *vad_.get());
912 }
913
914 if (operation == kDtmf) {
915 // DTMF data was written the end of |sync_buffer_|.
916 // Update index to end of DTMF data in |sync_buffer_|.
917 sync_buffer_->set_dtmf_index(sync_buffer_->Size());
918 }
919
920 if (last_mode_ != kModeExpand) {
921 // If last operation was not expand, calculate the |playout_timestamp_| from
922 // the |sync_buffer_|. However, do not update the |playout_timestamp_| if it
923 // would be moved "backwards".
924 uint32_t temp_timestamp = sync_buffer_->end_timestamp() -
925 static_cast<uint32_t>(sync_buffer_->FutureLength());
926 if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) {
927 playout_timestamp_ = temp_timestamp;
928 }
929 } else {
930 // Use dead reckoning to estimate the |playout_timestamp_|.
931 playout_timestamp_ += static_cast<uint32_t>(output_size_samples_);
932 }
933
934 if (decode_return_value) return decode_return_value;
935 return return_value;
936 }
937
GetDecision(Operations * operation,PacketList * packet_list,DtmfEvent * dtmf_event,bool * play_dtmf)938 int NetEqImpl::GetDecision(Operations* operation,
939 PacketList* packet_list,
940 DtmfEvent* dtmf_event,
941 bool* play_dtmf) {
942 // Initialize output variables.
943 *play_dtmf = false;
944 *operation = kUndefined;
945
946 // Increment time counters.
947 packet_buffer_->IncrementWaitingTimes();
948 stats_.IncreaseCounter(output_size_samples_, fs_hz_);
949
950 assert(sync_buffer_.get());
951 uint32_t end_timestamp = sync_buffer_->end_timestamp();
952 if (!new_codec_) {
953 const uint32_t five_seconds_samples = 5 * fs_hz_;
954 packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples);
955 }
956 const RTPHeader* header = packet_buffer_->NextRtpHeader();
957
958 if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
959 // Because of timestamp peculiarities, we have to "manually" disallow using
960 // a CNG packet with the same timestamp as the one that was last played.
961 // This can happen when using redundancy and will cause the timing to shift.
962 while (header && decoder_database_->IsComfortNoise(header->payloadType) &&
963 (end_timestamp >= header->timestamp ||
964 end_timestamp + decision_logic_->generated_noise_samples() >
965 header->timestamp)) {
966 // Don't use this packet, discard it.
967 if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) {
968 assert(false); // Must be ok by design.
969 }
970 // Check buffer again.
971 if (!new_codec_) {
972 packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_);
973 }
974 header = packet_buffer_->NextRtpHeader();
975 }
976 }
977
978 assert(expand_.get());
979 const int samples_left = static_cast<int>(sync_buffer_->FutureLength() -
980 expand_->overlap_length());
981 if (last_mode_ == kModeAccelerateSuccess ||
982 last_mode_ == kModeAccelerateLowEnergy ||
983 last_mode_ == kModePreemptiveExpandSuccess ||
984 last_mode_ == kModePreemptiveExpandLowEnergy) {
985 // Subtract (samples_left + output_size_samples_) from sampleMemory.
986 decision_logic_->AddSampleMemory(
987 -(samples_left + rtc::checked_cast<int>(output_size_samples_)));
988 }
989
990 // Check if it is time to play a DTMF event.
991 if (dtmf_buffer_->GetEvent(
992 static_cast<uint32_t>(
993 end_timestamp + decision_logic_->generated_noise_samples()),
994 dtmf_event)) {
995 *play_dtmf = true;
996 }
997
998 // Get instruction.
999 assert(sync_buffer_.get());
1000 assert(expand_.get());
1001 *operation = decision_logic_->GetDecision(*sync_buffer_,
1002 *expand_,
1003 decoder_frame_length_,
1004 header,
1005 last_mode_,
1006 *play_dtmf,
1007 &reset_decoder_);
1008
1009 // Check if we already have enough samples in the |sync_buffer_|. If so,
1010 // change decision to normal, unless the decision was merge, accelerate, or
1011 // preemptive expand.
1012 if (samples_left >= rtc::checked_cast<int>(output_size_samples_) &&
1013 *operation != kMerge &&
1014 *operation != kAccelerate &&
1015 *operation != kFastAccelerate &&
1016 *operation != kPreemptiveExpand) {
1017 *operation = kNormal;
1018 return 0;
1019 }
1020
1021 decision_logic_->ExpandDecision(*operation);
1022
1023 // Check conditions for reset.
1024 if (new_codec_ || *operation == kUndefined) {
1025 // The only valid reason to get kUndefined is that new_codec_ is set.
1026 assert(new_codec_);
1027 if (*play_dtmf && !header) {
1028 timestamp_ = dtmf_event->timestamp;
1029 } else {
1030 if (!header) {
1031 LOG(LS_ERROR) << "Packet missing where it shouldn't.";
1032 return -1;
1033 }
1034 timestamp_ = header->timestamp;
1035 if (*operation == kRfc3389CngNoPacket
1036 #ifndef LEGACY_BITEXACT
1037 // Without this check, it can happen that a non-CNG packet is sent to
1038 // the CNG decoder as if it was a SID frame. This is clearly a bug,
1039 // but is kept for now to maintain bit-exactness with the test
1040 // vectors.
1041 && decoder_database_->IsComfortNoise(header->payloadType)
1042 #endif
1043 ) {
1044 // Change decision to CNG packet, since we do have a CNG packet, but it
1045 // was considered too early to use. Now, use it anyway.
1046 *operation = kRfc3389Cng;
1047 } else if (*operation != kRfc3389Cng) {
1048 *operation = kNormal;
1049 }
1050 }
1051 // Adjust |sync_buffer_| timestamp before setting |end_timestamp| to the
1052 // new value.
1053 sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp);
1054 end_timestamp = timestamp_;
1055 new_codec_ = false;
1056 decision_logic_->SoftReset();
1057 buffer_level_filter_->Reset();
1058 delay_manager_->Reset();
1059 stats_.ResetMcu();
1060 }
1061
1062 size_t required_samples = output_size_samples_;
1063 const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_);
1064 const size_t samples_20_ms = 2 * samples_10_ms;
1065 const size_t samples_30_ms = 3 * samples_10_ms;
1066
1067 switch (*operation) {
1068 case kExpand: {
1069 timestamp_ = end_timestamp;
1070 return 0;
1071 }
1072 case kRfc3389CngNoPacket:
1073 case kCodecInternalCng: {
1074 return 0;
1075 }
1076 case kDtmf: {
1077 // TODO(hlundin): Write test for this.
1078 // Update timestamp.
1079 timestamp_ = end_timestamp;
1080 if (decision_logic_->generated_noise_samples() > 0 &&
1081 last_mode_ != kModeDtmf) {
1082 // Make a jump in timestamp due to the recently played comfort noise.
1083 uint32_t timestamp_jump =
1084 static_cast<uint32_t>(decision_logic_->generated_noise_samples());
1085 sync_buffer_->IncreaseEndTimestamp(timestamp_jump);
1086 timestamp_ += timestamp_jump;
1087 }
1088 decision_logic_->set_generated_noise_samples(0);
1089 return 0;
1090 }
1091 case kAccelerate:
1092 case kFastAccelerate: {
1093 // In order to do an accelerate we need at least 30 ms of audio data.
1094 if (samples_left >= static_cast<int>(samples_30_ms)) {
1095 // Already have enough data, so we do not need to extract any more.
1096 decision_logic_->set_sample_memory(samples_left);
1097 decision_logic_->set_prev_time_scale(true);
1098 return 0;
1099 } else if (samples_left >= static_cast<int>(samples_10_ms) &&
1100 decoder_frame_length_ >= samples_30_ms) {
1101 // Avoid decoding more data as it might overflow the playout buffer.
1102 *operation = kNormal;
1103 return 0;
1104 } else if (samples_left < static_cast<int>(samples_20_ms) &&
1105 decoder_frame_length_ < samples_30_ms) {
1106 // Build up decoded data by decoding at least 20 ms of audio data. Do
1107 // not perform accelerate yet, but wait until we only need to do one
1108 // decoding.
1109 required_samples = 2 * output_size_samples_;
1110 *operation = kNormal;
1111 }
1112 // If none of the above is true, we have one of two possible situations:
1113 // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or
1114 // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms.
1115 // In either case, we move on with the accelerate decision, and decode one
1116 // frame now.
1117 break;
1118 }
1119 case kPreemptiveExpand: {
1120 // In order to do a preemptive expand we need at least 30 ms of decoded
1121 // audio data.
1122 if ((samples_left >= static_cast<int>(samples_30_ms)) ||
1123 (samples_left >= static_cast<int>(samples_10_ms) &&
1124 decoder_frame_length_ >= samples_30_ms)) {
1125 // Already have enough data, so we do not need to extract any more.
1126 // Or, avoid decoding more data as it might overflow the playout buffer.
1127 // Still try preemptive expand, though.
1128 decision_logic_->set_sample_memory(samples_left);
1129 decision_logic_->set_prev_time_scale(true);
1130 return 0;
1131 }
1132 if (samples_left < static_cast<int>(samples_20_ms) &&
1133 decoder_frame_length_ < samples_30_ms) {
1134 // Build up decoded data by decoding at least 20 ms of audio data.
1135 // Still try to perform preemptive expand.
1136 required_samples = 2 * output_size_samples_;
1137 }
1138 // Move on with the preemptive expand decision.
1139 break;
1140 }
1141 case kMerge: {
1142 required_samples =
1143 std::max(merge_->RequiredFutureSamples(), required_samples);
1144 break;
1145 }
1146 default: {
1147 // Do nothing.
1148 }
1149 }
1150
1151 // Get packets from buffer.
1152 int extracted_samples = 0;
1153 if (header &&
1154 *operation != kAlternativePlc &&
1155 *operation != kAlternativePlcIncreaseTimestamp &&
1156 *operation != kAudioRepetition &&
1157 *operation != kAudioRepetitionIncreaseTimestamp) {
1158 sync_buffer_->IncreaseEndTimestamp(header->timestamp - end_timestamp);
1159 if (decision_logic_->CngOff()) {
1160 // Adjustment of timestamp only corresponds to an actual packet loss
1161 // if comfort noise is not played. If comfort noise was just played,
1162 // this adjustment of timestamp is only done to get back in sync with the
1163 // stream timestamp; no loss to report.
1164 stats_.LostSamples(header->timestamp - end_timestamp);
1165 }
1166
1167 if (*operation != kRfc3389Cng) {
1168 // We are about to decode and use a non-CNG packet.
1169 decision_logic_->SetCngOff();
1170 }
1171 // Reset CNG timestamp as a new packet will be delivered.
1172 // (Also if this is a CNG packet, since playedOutTS is updated.)
1173 decision_logic_->set_generated_noise_samples(0);
1174
1175 extracted_samples = ExtractPackets(required_samples, packet_list);
1176 if (extracted_samples < 0) {
1177 return kPacketBufferCorruption;
1178 }
1179 }
1180
1181 if (*operation == kAccelerate || *operation == kFastAccelerate ||
1182 *operation == kPreemptiveExpand) {
1183 decision_logic_->set_sample_memory(samples_left + extracted_samples);
1184 decision_logic_->set_prev_time_scale(true);
1185 }
1186
1187 if (*operation == kAccelerate || *operation == kFastAccelerate) {
1188 // Check that we have enough data (30ms) to do accelerate.
1189 if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) {
1190 // TODO(hlundin): Write test for this.
1191 // Not enough, do normal operation instead.
1192 *operation = kNormal;
1193 }
1194 }
1195
1196 timestamp_ = end_timestamp;
1197 return 0;
1198 }
1199
Decode(PacketList * packet_list,Operations * operation,int * decoded_length,AudioDecoder::SpeechType * speech_type)1200 int NetEqImpl::Decode(PacketList* packet_list, Operations* operation,
1201 int* decoded_length,
1202 AudioDecoder::SpeechType* speech_type) {
1203 *speech_type = AudioDecoder::kSpeech;
1204
1205 // When packet_list is empty, we may be in kCodecInternalCng mode, and for
1206 // that we use current active decoder.
1207 AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
1208
1209 if (!packet_list->empty()) {
1210 const Packet* packet = packet_list->front();
1211 uint8_t payload_type = packet->header.payloadType;
1212 if (!decoder_database_->IsComfortNoise(payload_type)) {
1213 decoder = decoder_database_->GetDecoder(payload_type);
1214 assert(decoder);
1215 if (!decoder) {
1216 LOG(LS_WARNING) << "Unknown payload type "
1217 << static_cast<int>(payload_type);
1218 PacketBuffer::DeleteAllPackets(packet_list);
1219 return kDecoderNotFound;
1220 }
1221 bool decoder_changed;
1222 decoder_database_->SetActiveDecoder(payload_type, &decoder_changed);
1223 if (decoder_changed) {
1224 // We have a new decoder. Re-init some values.
1225 const DecoderDatabase::DecoderInfo* decoder_info = decoder_database_
1226 ->GetDecoderInfo(payload_type);
1227 assert(decoder_info);
1228 if (!decoder_info) {
1229 LOG(LS_WARNING) << "Unknown payload type "
1230 << static_cast<int>(payload_type);
1231 PacketBuffer::DeleteAllPackets(packet_list);
1232 return kDecoderNotFound;
1233 }
1234 // If sampling rate or number of channels has changed, we need to make
1235 // a reset.
1236 if (decoder_info->fs_hz != fs_hz_ ||
1237 decoder->Channels() != algorithm_buffer_->Channels()) {
1238 // TODO(tlegrand): Add unittest to cover this event.
1239 SetSampleRateAndChannels(decoder_info->fs_hz, decoder->Channels());
1240 }
1241 sync_buffer_->set_end_timestamp(timestamp_);
1242 playout_timestamp_ = timestamp_;
1243 }
1244 }
1245 }
1246
1247 if (reset_decoder_) {
1248 // TODO(hlundin): Write test for this.
1249 if (decoder)
1250 decoder->Reset();
1251
1252 // Reset comfort noise decoder.
1253 AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
1254 if (cng_decoder)
1255 cng_decoder->Reset();
1256
1257 reset_decoder_ = false;
1258 }
1259
1260 #ifdef LEGACY_BITEXACT
1261 // Due to a bug in old SignalMCU, it could happen that CNG operation was
1262 // decided, but a speech packet was provided. The speech packet will be used
1263 // to update the comfort noise decoder, as if it was a SID frame, which is
1264 // clearly wrong.
1265 if (*operation == kRfc3389Cng) {
1266 return 0;
1267 }
1268 #endif
1269
1270 *decoded_length = 0;
1271 // Update codec-internal PLC state.
1272 if ((*operation == kMerge) && decoder && decoder->HasDecodePlc()) {
1273 decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]);
1274 }
1275
1276 int return_value;
1277 if (*operation == kCodecInternalCng) {
1278 RTC_DCHECK(packet_list->empty());
1279 return_value = DecodeCng(decoder, decoded_length, speech_type);
1280 } else {
1281 return_value = DecodeLoop(packet_list, *operation, decoder,
1282 decoded_length, speech_type);
1283 }
1284
1285 if (*decoded_length < 0) {
1286 // Error returned from the decoder.
1287 *decoded_length = 0;
1288 sync_buffer_->IncreaseEndTimestamp(
1289 static_cast<uint32_t>(decoder_frame_length_));
1290 int error_code = 0;
1291 if (decoder)
1292 error_code = decoder->ErrorCode();
1293 if (error_code != 0) {
1294 // Got some error code from the decoder.
1295 decoder_error_code_ = error_code;
1296 return_value = kDecoderErrorCode;
1297 LOG(LS_WARNING) << "Decoder returned error code: " << error_code;
1298 } else {
1299 // Decoder does not implement error codes. Return generic error.
1300 return_value = kOtherDecoderError;
1301 LOG(LS_WARNING) << "Decoder error (no error code)";
1302 }
1303 *operation = kExpand; // Do expansion to get data instead.
1304 }
1305 if (*speech_type != AudioDecoder::kComfortNoise) {
1306 // Don't increment timestamp if codec returned CNG speech type
1307 // since in this case, the we will increment the CNGplayedTS counter.
1308 // Increase with number of samples per channel.
1309 assert(*decoded_length == 0 ||
1310 (decoder && decoder->Channels() == sync_buffer_->Channels()));
1311 sync_buffer_->IncreaseEndTimestamp(
1312 *decoded_length / static_cast<int>(sync_buffer_->Channels()));
1313 }
1314 return return_value;
1315 }
1316
DecodeCng(AudioDecoder * decoder,int * decoded_length,AudioDecoder::SpeechType * speech_type)1317 int NetEqImpl::DecodeCng(AudioDecoder* decoder, int* decoded_length,
1318 AudioDecoder::SpeechType* speech_type) {
1319 if (!decoder) {
1320 // This happens when active decoder is not defined.
1321 *decoded_length = -1;
1322 return 0;
1323 }
1324
1325 while (*decoded_length < rtc::checked_cast<int>(output_size_samples_)) {
1326 const int length = decoder->Decode(
1327 nullptr, 0, fs_hz_,
1328 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1329 &decoded_buffer_[*decoded_length], speech_type);
1330 if (length > 0) {
1331 *decoded_length += length;
1332 LOG(LS_VERBOSE) << "Decoded " << length << " CNG samples";
1333 } else {
1334 // Error.
1335 LOG(LS_WARNING) << "Failed to decode CNG";
1336 *decoded_length = -1;
1337 break;
1338 }
1339 if (*decoded_length > static_cast<int>(decoded_buffer_length_)) {
1340 // Guard against overflow.
1341 LOG(LS_WARNING) << "Decoded too much CNG.";
1342 return kDecodedTooMuch;
1343 }
1344 }
1345 return 0;
1346 }
1347
DecodeLoop(PacketList * packet_list,const Operations & operation,AudioDecoder * decoder,int * decoded_length,AudioDecoder::SpeechType * speech_type)1348 int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation,
1349 AudioDecoder* decoder, int* decoded_length,
1350 AudioDecoder::SpeechType* speech_type) {
1351 Packet* packet = NULL;
1352 if (!packet_list->empty()) {
1353 packet = packet_list->front();
1354 }
1355
1356 // Do decoding.
1357 while (packet &&
1358 !decoder_database_->IsComfortNoise(packet->header.payloadType)) {
1359 assert(decoder); // At this point, we must have a decoder object.
1360 // The number of channels in the |sync_buffer_| should be the same as the
1361 // number decoder channels.
1362 assert(sync_buffer_->Channels() == decoder->Channels());
1363 assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->Channels());
1364 assert(operation == kNormal || operation == kAccelerate ||
1365 operation == kFastAccelerate || operation == kMerge ||
1366 operation == kPreemptiveExpand);
1367 packet_list->pop_front();
1368 size_t payload_length = packet->payload_length;
1369 int decode_length;
1370 if (packet->sync_packet) {
1371 // Decode to silence with the same frame size as the last decode.
1372 LOG(LS_VERBOSE) << "Decoding sync-packet: " <<
1373 " ts=" << packet->header.timestamp <<
1374 ", sn=" << packet->header.sequenceNumber <<
1375 ", pt=" << static_cast<int>(packet->header.payloadType) <<
1376 ", ssrc=" << packet->header.ssrc <<
1377 ", len=" << packet->payload_length;
1378 memset(&decoded_buffer_[*decoded_length], 0,
1379 decoder_frame_length_ * decoder->Channels() *
1380 sizeof(decoded_buffer_[0]));
1381 decode_length = rtc::checked_cast<int>(decoder_frame_length_);
1382 } else if (!packet->primary) {
1383 // This is a redundant payload; call the special decoder method.
1384 LOG(LS_VERBOSE) << "Decoding packet (redundant):" <<
1385 " ts=" << packet->header.timestamp <<
1386 ", sn=" << packet->header.sequenceNumber <<
1387 ", pt=" << static_cast<int>(packet->header.payloadType) <<
1388 ", ssrc=" << packet->header.ssrc <<
1389 ", len=" << packet->payload_length;
1390 decode_length = decoder->DecodeRedundant(
1391 packet->payload, packet->payload_length, fs_hz_,
1392 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1393 &decoded_buffer_[*decoded_length], speech_type);
1394 } else {
1395 LOG(LS_VERBOSE) << "Decoding packet: ts=" << packet->header.timestamp <<
1396 ", sn=" << packet->header.sequenceNumber <<
1397 ", pt=" << static_cast<int>(packet->header.payloadType) <<
1398 ", ssrc=" << packet->header.ssrc <<
1399 ", len=" << packet->payload_length;
1400 decode_length =
1401 decoder->Decode(
1402 packet->payload, packet->payload_length, fs_hz_,
1403 (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1404 &decoded_buffer_[*decoded_length], speech_type);
1405 }
1406
1407 delete[] packet->payload;
1408 delete packet;
1409 packet = NULL;
1410 if (decode_length > 0) {
1411 *decoded_length += decode_length;
1412 // Update |decoder_frame_length_| with number of samples per channel.
1413 decoder_frame_length_ =
1414 static_cast<size_t>(decode_length) / decoder->Channels();
1415 LOG(LS_VERBOSE) << "Decoded " << decode_length << " samples ("
1416 << decoder->Channels() << " channel(s) -> "
1417 << decoder_frame_length_ << " samples per channel)";
1418 } else if (decode_length < 0) {
1419 // Error.
1420 LOG(LS_WARNING) << "Decode " << decode_length << " " << payload_length;
1421 *decoded_length = -1;
1422 PacketBuffer::DeleteAllPackets(packet_list);
1423 break;
1424 }
1425 if (*decoded_length > static_cast<int>(decoded_buffer_length_)) {
1426 // Guard against overflow.
1427 LOG(LS_WARNING) << "Decoded too much.";
1428 PacketBuffer::DeleteAllPackets(packet_list);
1429 return kDecodedTooMuch;
1430 }
1431 if (!packet_list->empty()) {
1432 packet = packet_list->front();
1433 } else {
1434 packet = NULL;
1435 }
1436 } // End of decode loop.
1437
1438 // If the list is not empty at this point, either a decoding error terminated
1439 // the while-loop, or list must hold exactly one CNG packet.
1440 assert(packet_list->empty() || *decoded_length < 0 ||
1441 (packet_list->size() == 1 && packet &&
1442 decoder_database_->IsComfortNoise(packet->header.payloadType)));
1443 return 0;
1444 }
1445
DoNormal(const int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1446 void NetEqImpl::DoNormal(const int16_t* decoded_buffer, size_t decoded_length,
1447 AudioDecoder::SpeechType speech_type, bool play_dtmf) {
1448 assert(normal_.get());
1449 assert(mute_factor_array_.get());
1450 normal_->Process(decoded_buffer, decoded_length, last_mode_,
1451 mute_factor_array_.get(), algorithm_buffer_.get());
1452 if (decoded_length != 0) {
1453 last_mode_ = kModeNormal;
1454 }
1455
1456 // If last packet was decoded as an inband CNG, set mode to CNG instead.
1457 if ((speech_type == AudioDecoder::kComfortNoise)
1458 || ((last_mode_ == kModeCodecInternalCng)
1459 && (decoded_length == 0))) {
1460 // TODO(hlundin): Remove second part of || statement above.
1461 last_mode_ = kModeCodecInternalCng;
1462 }
1463
1464 if (!play_dtmf) {
1465 dtmf_tone_generator_->Reset();
1466 }
1467 }
1468
DoMerge(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1469 void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length,
1470 AudioDecoder::SpeechType speech_type, bool play_dtmf) {
1471 assert(mute_factor_array_.get());
1472 assert(merge_.get());
1473 size_t new_length = merge_->Process(decoded_buffer, decoded_length,
1474 mute_factor_array_.get(),
1475 algorithm_buffer_.get());
1476 size_t expand_length_correction = new_length -
1477 decoded_length / algorithm_buffer_->Channels();
1478
1479 // Update in-call and post-call statistics.
1480 if (expand_->MuteFactor(0) == 0) {
1481 // Expand generates only noise.
1482 stats_.ExpandedNoiseSamples(expand_length_correction);
1483 } else {
1484 // Expansion generates more than only noise.
1485 stats_.ExpandedVoiceSamples(expand_length_correction);
1486 }
1487
1488 last_mode_ = kModeMerge;
1489 // If last packet was decoded as an inband CNG, set mode to CNG instead.
1490 if (speech_type == AudioDecoder::kComfortNoise) {
1491 last_mode_ = kModeCodecInternalCng;
1492 }
1493 expand_->Reset();
1494 if (!play_dtmf) {
1495 dtmf_tone_generator_->Reset();
1496 }
1497 }
1498
DoExpand(bool play_dtmf)1499 int NetEqImpl::DoExpand(bool play_dtmf) {
1500 while ((sync_buffer_->FutureLength() - expand_->overlap_length()) <
1501 output_size_samples_) {
1502 algorithm_buffer_->Clear();
1503 int return_value = expand_->Process(algorithm_buffer_.get());
1504 size_t length = algorithm_buffer_->Size();
1505
1506 // Update in-call and post-call statistics.
1507 if (expand_->MuteFactor(0) == 0) {
1508 // Expand operation generates only noise.
1509 stats_.ExpandedNoiseSamples(length);
1510 } else {
1511 // Expand operation generates more than only noise.
1512 stats_.ExpandedVoiceSamples(length);
1513 }
1514
1515 last_mode_ = kModeExpand;
1516
1517 if (return_value < 0) {
1518 return return_value;
1519 }
1520
1521 sync_buffer_->PushBack(*algorithm_buffer_);
1522 algorithm_buffer_->Clear();
1523 }
1524 if (!play_dtmf) {
1525 dtmf_tone_generator_->Reset();
1526 }
1527 return 0;
1528 }
1529
DoAccelerate(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf,bool fast_accelerate)1530 int NetEqImpl::DoAccelerate(int16_t* decoded_buffer,
1531 size_t decoded_length,
1532 AudioDecoder::SpeechType speech_type,
1533 bool play_dtmf,
1534 bool fast_accelerate) {
1535 const size_t required_samples =
1536 static_cast<size_t>(240 * fs_mult_); // Must have 30 ms.
1537 size_t borrowed_samples_per_channel = 0;
1538 size_t num_channels = algorithm_buffer_->Channels();
1539 size_t decoded_length_per_channel = decoded_length / num_channels;
1540 if (decoded_length_per_channel < required_samples) {
1541 // Must move data from the |sync_buffer_| in order to get 30 ms.
1542 borrowed_samples_per_channel = static_cast<int>(required_samples -
1543 decoded_length_per_channel);
1544 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
1545 decoded_buffer,
1546 sizeof(int16_t) * decoded_length);
1547 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel,
1548 decoded_buffer);
1549 decoded_length = required_samples * num_channels;
1550 }
1551
1552 size_t samples_removed;
1553 Accelerate::ReturnCodes return_code =
1554 accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate,
1555 algorithm_buffer_.get(), &samples_removed);
1556 stats_.AcceleratedSamples(samples_removed);
1557 switch (return_code) {
1558 case Accelerate::kSuccess:
1559 last_mode_ = kModeAccelerateSuccess;
1560 break;
1561 case Accelerate::kSuccessLowEnergy:
1562 last_mode_ = kModeAccelerateLowEnergy;
1563 break;
1564 case Accelerate::kNoStretch:
1565 last_mode_ = kModeAccelerateFail;
1566 break;
1567 case Accelerate::kError:
1568 // TODO(hlundin): Map to kModeError instead?
1569 last_mode_ = kModeAccelerateFail;
1570 return kAccelerateError;
1571 }
1572
1573 if (borrowed_samples_per_channel > 0) {
1574 // Copy borrowed samples back to the |sync_buffer_|.
1575 size_t length = algorithm_buffer_->Size();
1576 if (length < borrowed_samples_per_channel) {
1577 // This destroys the beginning of the buffer, but will not cause any
1578 // problems.
1579 sync_buffer_->ReplaceAtIndex(*algorithm_buffer_,
1580 sync_buffer_->Size() -
1581 borrowed_samples_per_channel);
1582 sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length);
1583 algorithm_buffer_->PopFront(length);
1584 assert(algorithm_buffer_->Empty());
1585 } else {
1586 sync_buffer_->ReplaceAtIndex(*algorithm_buffer_,
1587 borrowed_samples_per_channel,
1588 sync_buffer_->Size() -
1589 borrowed_samples_per_channel);
1590 algorithm_buffer_->PopFront(borrowed_samples_per_channel);
1591 }
1592 }
1593
1594 // If last packet was decoded as an inband CNG, set mode to CNG instead.
1595 if (speech_type == AudioDecoder::kComfortNoise) {
1596 last_mode_ = kModeCodecInternalCng;
1597 }
1598 if (!play_dtmf) {
1599 dtmf_tone_generator_->Reset();
1600 }
1601 expand_->Reset();
1602 return 0;
1603 }
1604
DoPreemptiveExpand(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1605 int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer,
1606 size_t decoded_length,
1607 AudioDecoder::SpeechType speech_type,
1608 bool play_dtmf) {
1609 const size_t required_samples =
1610 static_cast<size_t>(240 * fs_mult_); // Must have 30 ms.
1611 size_t num_channels = algorithm_buffer_->Channels();
1612 size_t borrowed_samples_per_channel = 0;
1613 size_t old_borrowed_samples_per_channel = 0;
1614 size_t decoded_length_per_channel = decoded_length / num_channels;
1615 if (decoded_length_per_channel < required_samples) {
1616 // Must move data from the |sync_buffer_| in order to get 30 ms.
1617 borrowed_samples_per_channel =
1618 required_samples - decoded_length_per_channel;
1619 // Calculate how many of these were already played out.
1620 old_borrowed_samples_per_channel =
1621 (borrowed_samples_per_channel > sync_buffer_->FutureLength()) ?
1622 (borrowed_samples_per_channel - sync_buffer_->FutureLength()) : 0;
1623 memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
1624 decoded_buffer,
1625 sizeof(int16_t) * decoded_length);
1626 sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel,
1627 decoded_buffer);
1628 decoded_length = required_samples * num_channels;
1629 }
1630
1631 size_t samples_added;
1632 PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process(
1633 decoded_buffer, decoded_length,
1634 old_borrowed_samples_per_channel,
1635 algorithm_buffer_.get(), &samples_added);
1636 stats_.PreemptiveExpandedSamples(samples_added);
1637 switch (return_code) {
1638 case PreemptiveExpand::kSuccess:
1639 last_mode_ = kModePreemptiveExpandSuccess;
1640 break;
1641 case PreemptiveExpand::kSuccessLowEnergy:
1642 last_mode_ = kModePreemptiveExpandLowEnergy;
1643 break;
1644 case PreemptiveExpand::kNoStretch:
1645 last_mode_ = kModePreemptiveExpandFail;
1646 break;
1647 case PreemptiveExpand::kError:
1648 // TODO(hlundin): Map to kModeError instead?
1649 last_mode_ = kModePreemptiveExpandFail;
1650 return kPreemptiveExpandError;
1651 }
1652
1653 if (borrowed_samples_per_channel > 0) {
1654 // Copy borrowed samples back to the |sync_buffer_|.
1655 sync_buffer_->ReplaceAtIndex(
1656 *algorithm_buffer_, borrowed_samples_per_channel,
1657 sync_buffer_->Size() - borrowed_samples_per_channel);
1658 algorithm_buffer_->PopFront(borrowed_samples_per_channel);
1659 }
1660
1661 // If last packet was decoded as an inband CNG, set mode to CNG instead.
1662 if (speech_type == AudioDecoder::kComfortNoise) {
1663 last_mode_ = kModeCodecInternalCng;
1664 }
1665 if (!play_dtmf) {
1666 dtmf_tone_generator_->Reset();
1667 }
1668 expand_->Reset();
1669 return 0;
1670 }
1671
DoRfc3389Cng(PacketList * packet_list,bool play_dtmf)1672 int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) {
1673 if (!packet_list->empty()) {
1674 // Must have exactly one SID frame at this point.
1675 assert(packet_list->size() == 1);
1676 Packet* packet = packet_list->front();
1677 packet_list->pop_front();
1678 if (!decoder_database_->IsComfortNoise(packet->header.payloadType)) {
1679 #ifdef LEGACY_BITEXACT
1680 // This can happen due to a bug in GetDecision. Change the payload type
1681 // to a CNG type, and move on. Note that this means that we are in fact
1682 // sending a non-CNG payload to the comfort noise decoder for decoding.
1683 // Clearly wrong, but will maintain bit-exactness with legacy.
1684 if (fs_hz_ == 8000) {
1685 packet->header.payloadType =
1686 decoder_database_->GetRtpPayloadType(NetEqDecoder::kDecoderCNGnb);
1687 } else if (fs_hz_ == 16000) {
1688 packet->header.payloadType =
1689 decoder_database_->GetRtpPayloadType(NetEqDecoder::kDecoderCNGwb);
1690 } else if (fs_hz_ == 32000) {
1691 packet->header.payloadType = decoder_database_->GetRtpPayloadType(
1692 NetEqDecoder::kDecoderCNGswb32kHz);
1693 } else if (fs_hz_ == 48000) {
1694 packet->header.payloadType = decoder_database_->GetRtpPayloadType(
1695 NetEqDecoder::kDecoderCNGswb48kHz);
1696 }
1697 assert(decoder_database_->IsComfortNoise(packet->header.payloadType));
1698 #else
1699 LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG.";
1700 return kOtherError;
1701 #endif
1702 }
1703 // UpdateParameters() deletes |packet|.
1704 if (comfort_noise_->UpdateParameters(packet) ==
1705 ComfortNoise::kInternalError) {
1706 algorithm_buffer_->Zeros(output_size_samples_);
1707 return -comfort_noise_->internal_error_code();
1708 }
1709 }
1710 int cn_return = comfort_noise_->Generate(output_size_samples_,
1711 algorithm_buffer_.get());
1712 expand_->Reset();
1713 last_mode_ = kModeRfc3389Cng;
1714 if (!play_dtmf) {
1715 dtmf_tone_generator_->Reset();
1716 }
1717 if (cn_return == ComfortNoise::kInternalError) {
1718 decoder_error_code_ = comfort_noise_->internal_error_code();
1719 return kComfortNoiseErrorCode;
1720 } else if (cn_return == ComfortNoise::kUnknownPayloadType) {
1721 return kUnknownRtpPayloadType;
1722 }
1723 return 0;
1724 }
1725
DoCodecInternalCng(const int16_t * decoded_buffer,size_t decoded_length)1726 void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer,
1727 size_t decoded_length) {
1728 RTC_DCHECK(normal_.get());
1729 RTC_DCHECK(mute_factor_array_.get());
1730 normal_->Process(decoded_buffer, decoded_length, last_mode_,
1731 mute_factor_array_.get(), algorithm_buffer_.get());
1732 last_mode_ = kModeCodecInternalCng;
1733 expand_->Reset();
1734 }
1735
DoDtmf(const DtmfEvent & dtmf_event,bool * play_dtmf)1736 int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) {
1737 // This block of the code and the block further down, handling |dtmf_switch|
1738 // are commented out. Otherwise playing out-of-band DTMF would fail in VoE
1739 // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is
1740 // equivalent to |dtmf_switch| always be false.
1741 //
1742 // See http://webrtc-codereview.appspot.com/1195004/ for discussion
1743 // On this issue. This change might cause some glitches at the point of
1744 // switch from audio to DTMF. Issue 1545 is filed to track this.
1745 //
1746 // bool dtmf_switch = false;
1747 // if ((last_mode_ != kModeDtmf) && dtmf_tone_generator_->initialized()) {
1748 // // Special case; see below.
1749 // // We must catch this before calling Generate, since |initialized| is
1750 // // modified in that call.
1751 // dtmf_switch = true;
1752 // }
1753
1754 int dtmf_return_value = 0;
1755 if (!dtmf_tone_generator_->initialized()) {
1756 // Initialize if not already done.
1757 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no,
1758 dtmf_event.volume);
1759 }
1760
1761 if (dtmf_return_value == 0) {
1762 // Generate DTMF signal.
1763 dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_,
1764 algorithm_buffer_.get());
1765 }
1766
1767 if (dtmf_return_value < 0) {
1768 algorithm_buffer_->Zeros(output_size_samples_);
1769 return dtmf_return_value;
1770 }
1771
1772 // if (dtmf_switch) {
1773 // // This is the special case where the previous operation was DTMF
1774 // // overdub, but the current instruction is "regular" DTMF. We must make
1775 // // sure that the DTMF does not have any discontinuities. The first DTMF
1776 // // sample that we generate now must be played out immediately, therefore
1777 // // it must be copied to the speech buffer.
1778 // // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and
1779 // // verify correct operation.
1780 // assert(false);
1781 // // Must generate enough data to replace all of the |sync_buffer_|
1782 // // "future".
1783 // int required_length = sync_buffer_->FutureLength();
1784 // assert(dtmf_tone_generator_->initialized());
1785 // dtmf_return_value = dtmf_tone_generator_->Generate(required_length,
1786 // algorithm_buffer_);
1787 // assert((size_t) required_length == algorithm_buffer_->Size());
1788 // if (dtmf_return_value < 0) {
1789 // algorithm_buffer_->Zeros(output_size_samples_);
1790 // return dtmf_return_value;
1791 // }
1792 //
1793 // // Overwrite the "future" part of the speech buffer with the new DTMF
1794 // // data.
1795 // // TODO(hlundin): It seems that this overwriting has gone lost.
1796 // // Not adapted for multi-channel yet.
1797 // assert(algorithm_buffer_->Channels() == 1);
1798 // if (algorithm_buffer_->Channels() != 1) {
1799 // LOG(LS_WARNING) << "DTMF not supported for more than one channel";
1800 // return kStereoNotSupported;
1801 // }
1802 // // Shuffle the remaining data to the beginning of algorithm buffer.
1803 // algorithm_buffer_->PopFront(sync_buffer_->FutureLength());
1804 // }
1805
1806 sync_buffer_->IncreaseEndTimestamp(
1807 static_cast<uint32_t>(output_size_samples_));
1808 expand_->Reset();
1809 last_mode_ = kModeDtmf;
1810
1811 // Set to false because the DTMF is already in the algorithm buffer.
1812 *play_dtmf = false;
1813 return 0;
1814 }
1815
DoAlternativePlc(bool increase_timestamp)1816 void NetEqImpl::DoAlternativePlc(bool increase_timestamp) {
1817 AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
1818 size_t length;
1819 if (decoder && decoder->HasDecodePlc()) {
1820 // Use the decoder's packet-loss concealment.
1821 // TODO(hlundin): Will probably need a longer buffer for multi-channel.
1822 int16_t decoded_buffer[kMaxFrameSize];
1823 length = decoder->DecodePlc(1, decoded_buffer);
1824 if (length > 0)
1825 algorithm_buffer_->PushBackInterleaved(decoded_buffer, length);
1826 } else {
1827 // Do simple zero-stuffing.
1828 length = output_size_samples_;
1829 algorithm_buffer_->Zeros(length);
1830 // By not advancing the timestamp, NetEq inserts samples.
1831 stats_.AddZeros(length);
1832 }
1833 if (increase_timestamp) {
1834 sync_buffer_->IncreaseEndTimestamp(static_cast<uint32_t>(length));
1835 }
1836 expand_->Reset();
1837 }
1838
DtmfOverdub(const DtmfEvent & dtmf_event,size_t num_channels,int16_t * output) const1839 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels,
1840 int16_t* output) const {
1841 size_t out_index = 0;
1842 size_t overdub_length = output_size_samples_; // Default value.
1843
1844 if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) {
1845 // Special operation for transition from "DTMF only" to "DTMF overdub".
1846 out_index = std::min(
1847 sync_buffer_->dtmf_index() - sync_buffer_->next_index(),
1848 output_size_samples_);
1849 overdub_length = output_size_samples_ - out_index;
1850 }
1851
1852 AudioMultiVector dtmf_output(num_channels);
1853 int dtmf_return_value = 0;
1854 if (!dtmf_tone_generator_->initialized()) {
1855 dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no,
1856 dtmf_event.volume);
1857 }
1858 if (dtmf_return_value == 0) {
1859 dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length,
1860 &dtmf_output);
1861 assert(overdub_length == dtmf_output.Size());
1862 }
1863 dtmf_output.ReadInterleaved(overdub_length, &output[out_index]);
1864 return dtmf_return_value < 0 ? dtmf_return_value : 0;
1865 }
1866
ExtractPackets(size_t required_samples,PacketList * packet_list)1867 int NetEqImpl::ExtractPackets(size_t required_samples,
1868 PacketList* packet_list) {
1869 bool first_packet = true;
1870 uint8_t prev_payload_type = 0;
1871 uint32_t prev_timestamp = 0;
1872 uint16_t prev_sequence_number = 0;
1873 bool next_packet_available = false;
1874
1875 const RTPHeader* header = packet_buffer_->NextRtpHeader();
1876 assert(header);
1877 if (!header) {
1878 LOG(LS_ERROR) << "Packet buffer unexpectedly empty.";
1879 return -1;
1880 }
1881 uint32_t first_timestamp = header->timestamp;
1882 int extracted_samples = 0;
1883
1884 // Packet extraction loop.
1885 do {
1886 timestamp_ = header->timestamp;
1887 size_t discard_count = 0;
1888 Packet* packet = packet_buffer_->GetNextPacket(&discard_count);
1889 // |header| may be invalid after the |packet_buffer_| operation.
1890 header = NULL;
1891 if (!packet) {
1892 LOG(LS_ERROR) << "Should always be able to extract a packet here";
1893 assert(false); // Should always be able to extract a packet here.
1894 return -1;
1895 }
1896 stats_.PacketsDiscarded(discard_count);
1897 // Store waiting time in ms; packets->waiting_time is in "output blocks".
1898 stats_.StoreWaitingTime(packet->waiting_time * kOutputSizeMs);
1899 assert(packet->payload_length > 0);
1900 packet_list->push_back(packet); // Store packet in list.
1901
1902 if (first_packet) {
1903 first_packet = false;
1904 if (nack_enabled_) {
1905 RTC_DCHECK(nack_);
1906 // TODO(henrik.lundin): Should we update this for all decoded packets?
1907 nack_->UpdateLastDecodedPacket(packet->header.sequenceNumber,
1908 packet->header.timestamp);
1909 }
1910 prev_sequence_number = packet->header.sequenceNumber;
1911 prev_timestamp = packet->header.timestamp;
1912 prev_payload_type = packet->header.payloadType;
1913 }
1914
1915 // Store number of extracted samples.
1916 int packet_duration = 0;
1917 AudioDecoder* decoder = decoder_database_->GetDecoder(
1918 packet->header.payloadType);
1919 if (decoder) {
1920 if (packet->sync_packet) {
1921 packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
1922 } else {
1923 if (packet->primary) {
1924 packet_duration = decoder->PacketDuration(packet->payload,
1925 packet->payload_length);
1926 } else {
1927 packet_duration = decoder->
1928 PacketDurationRedundant(packet->payload, packet->payload_length);
1929 stats_.SecondaryDecodedSamples(packet_duration);
1930 }
1931 }
1932 } else {
1933 LOG(LS_WARNING) << "Unknown payload type "
1934 << static_cast<int>(packet->header.payloadType);
1935 assert(false);
1936 }
1937 if (packet_duration <= 0) {
1938 // Decoder did not return a packet duration. Assume that the packet
1939 // contains the same number of samples as the previous one.
1940 packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
1941 }
1942 extracted_samples = packet->header.timestamp - first_timestamp +
1943 packet_duration;
1944
1945 // Check what packet is available next.
1946 header = packet_buffer_->NextRtpHeader();
1947 next_packet_available = false;
1948 if (header && prev_payload_type == header->payloadType) {
1949 int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number;
1950 size_t ts_diff = header->timestamp - prev_timestamp;
1951 if (seq_no_diff == 1 ||
1952 (seq_no_diff == 0 && ts_diff == decoder_frame_length_)) {
1953 // The next sequence number is available, or the next part of a packet
1954 // that was split into pieces upon insertion.
1955 next_packet_available = true;
1956 }
1957 prev_sequence_number = header->sequenceNumber;
1958 }
1959 } while (extracted_samples < rtc::checked_cast<int>(required_samples) &&
1960 next_packet_available);
1961
1962 if (extracted_samples > 0) {
1963 // Delete old packets only when we are going to decode something. Otherwise,
1964 // we could end up in the situation where we never decode anything, since
1965 // all incoming packets are considered too old but the buffer will also
1966 // never be flooded and flushed.
1967 packet_buffer_->DiscardAllOldPackets(timestamp_);
1968 }
1969
1970 return extracted_samples;
1971 }
1972
UpdatePlcComponents(int fs_hz,size_t channels)1973 void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) {
1974 // Delete objects and create new ones.
1975 expand_.reset(expand_factory_->Create(background_noise_.get(),
1976 sync_buffer_.get(), &random_vector_,
1977 &stats_, fs_hz, channels));
1978 merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get()));
1979 }
1980
SetSampleRateAndChannels(int fs_hz,size_t channels)1981 void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
1982 LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " " << channels;
1983 // TODO(hlundin): Change to an enumerator and skip assert.
1984 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
1985 assert(channels > 0);
1986
1987 fs_hz_ = fs_hz;
1988 fs_mult_ = fs_hz / 8000;
1989 output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
1990 decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms.
1991
1992 last_mode_ = kModeNormal;
1993
1994 // Create a new array of mute factors and set all to 1.
1995 mute_factor_array_.reset(new int16_t[channels]);
1996 for (size_t i = 0; i < channels; ++i) {
1997 mute_factor_array_[i] = 16384; // 1.0 in Q14.
1998 }
1999
2000 AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
2001 if (cng_decoder)
2002 cng_decoder->Reset();
2003
2004 // Reinit post-decode VAD with new sample rate.
2005 assert(vad_.get()); // Cannot be NULL here.
2006 vad_->Init();
2007
2008 // Delete algorithm buffer and create a new one.
2009 algorithm_buffer_.reset(new AudioMultiVector(channels));
2010
2011 // Delete sync buffer and create a new one.
2012 sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_));
2013
2014 // Delete BackgroundNoise object and create a new one.
2015 background_noise_.reset(new BackgroundNoise(channels));
2016 background_noise_->set_mode(background_noise_mode_);
2017
2018 // Reset random vector.
2019 random_vector_.Reset();
2020
2021 UpdatePlcComponents(fs_hz, channels);
2022
2023 // Move index so that we create a small set of future samples (all 0).
2024 sync_buffer_->set_next_index(sync_buffer_->next_index() -
2025 expand_->overlap_length());
2026
2027 normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_,
2028 expand_.get()));
2029 accelerate_.reset(
2030 accelerate_factory_->Create(fs_hz, channels, *background_noise_));
2031 preemptive_expand_.reset(preemptive_expand_factory_->Create(
2032 fs_hz, channels, *background_noise_, expand_->overlap_length()));
2033
2034 // Delete ComfortNoise object and create a new one.
2035 comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(),
2036 sync_buffer_.get()));
2037
2038 // Verify that |decoded_buffer_| is long enough.
2039 if (decoded_buffer_length_ < kMaxFrameSize * channels) {
2040 // Reallocate to larger size.
2041 decoded_buffer_length_ = kMaxFrameSize * channels;
2042 decoded_buffer_.reset(new int16_t[decoded_buffer_length_]);
2043 }
2044
2045 // Create DecisionLogic if it is not created yet, then communicate new sample
2046 // rate and output size to DecisionLogic object.
2047 if (!decision_logic_.get()) {
2048 CreateDecisionLogic();
2049 }
2050 decision_logic_->SetSampleRate(fs_hz_, output_size_samples_);
2051 }
2052
LastOutputType()2053 NetEqOutputType NetEqImpl::LastOutputType() {
2054 assert(vad_.get());
2055 assert(expand_.get());
2056 if (last_mode_ == kModeCodecInternalCng || last_mode_ == kModeRfc3389Cng) {
2057 return kOutputCNG;
2058 } else if (last_mode_ == kModeExpand && expand_->MuteFactor(0) == 0) {
2059 // Expand mode has faded down to background noise only (very long expand).
2060 return kOutputPLCtoCNG;
2061 } else if (last_mode_ == kModeExpand) {
2062 return kOutputPLC;
2063 } else if (vad_->running() && !vad_->active_speech()) {
2064 return kOutputVADPassive;
2065 } else {
2066 return kOutputNormal;
2067 }
2068 }
2069
CreateDecisionLogic()2070 void NetEqImpl::CreateDecisionLogic() {
2071 decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_,
2072 playout_mode_,
2073 decoder_database_.get(),
2074 *packet_buffer_.get(),
2075 delay_manager_.get(),
2076 buffer_level_filter_.get()));
2077 }
2078 } // namespace webrtc
2079