1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
12 
13 #include <assert.h>
14 #include <memory.h>  // memset
15 
16 #include <algorithm>
17 
18 #include "webrtc/base/checks.h"
19 #include "webrtc/base/logging.h"
20 #include "webrtc/base/safe_conversions.h"
21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
22 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
23 #include "webrtc/modules/audio_coding/neteq/accelerate.h"
24 #include "webrtc/modules/audio_coding/neteq/background_noise.h"
25 #include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
26 #include "webrtc/modules/audio_coding/neteq/comfort_noise.h"
27 #include "webrtc/modules/audio_coding/neteq/decision_logic.h"
28 #include "webrtc/modules/audio_coding/neteq/decoder_database.h"
29 #include "webrtc/modules/audio_coding/neteq/defines.h"
30 #include "webrtc/modules/audio_coding/neteq/delay_manager.h"
31 #include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
32 #include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
33 #include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
34 #include "webrtc/modules/audio_coding/neteq/expand.h"
35 #include "webrtc/modules/audio_coding/neteq/merge.h"
36 #include "webrtc/modules/audio_coding/neteq/nack.h"
37 #include "webrtc/modules/audio_coding/neteq/normal.h"
38 #include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
39 #include "webrtc/modules/audio_coding/neteq/packet.h"
40 #include "webrtc/modules/audio_coding/neteq/payload_splitter.h"
41 #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
42 #include "webrtc/modules/audio_coding/neteq/preemptive_expand.h"
43 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
44 #include "webrtc/modules/audio_coding/neteq/timestamp_scaler.h"
45 #include "webrtc/modules/include/module_common_types.h"
46 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
47 
48 // Modify the code to obtain backwards bit-exactness. Once bit-exactness is no
49 // longer required, this #define should be removed (and the code that it
50 // enables).
51 #define LEGACY_BITEXACT
52 
53 namespace webrtc {
54 
NetEqImpl(const NetEq::Config & config,BufferLevelFilter * buffer_level_filter,DecoderDatabase * decoder_database,DelayManager * delay_manager,DelayPeakDetector * delay_peak_detector,DtmfBuffer * dtmf_buffer,DtmfToneGenerator * dtmf_tone_generator,PacketBuffer * packet_buffer,PayloadSplitter * payload_splitter,TimestampScaler * timestamp_scaler,AccelerateFactory * accelerate_factory,ExpandFactory * expand_factory,PreemptiveExpandFactory * preemptive_expand_factory,bool create_components)55 NetEqImpl::NetEqImpl(const NetEq::Config& config,
56                      BufferLevelFilter* buffer_level_filter,
57                      DecoderDatabase* decoder_database,
58                      DelayManager* delay_manager,
59                      DelayPeakDetector* delay_peak_detector,
60                      DtmfBuffer* dtmf_buffer,
61                      DtmfToneGenerator* dtmf_tone_generator,
62                      PacketBuffer* packet_buffer,
63                      PayloadSplitter* payload_splitter,
64                      TimestampScaler* timestamp_scaler,
65                      AccelerateFactory* accelerate_factory,
66                      ExpandFactory* expand_factory,
67                      PreemptiveExpandFactory* preemptive_expand_factory,
68                      bool create_components)
69     : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
70       buffer_level_filter_(buffer_level_filter),
71       decoder_database_(decoder_database),
72       delay_manager_(delay_manager),
73       delay_peak_detector_(delay_peak_detector),
74       dtmf_buffer_(dtmf_buffer),
75       dtmf_tone_generator_(dtmf_tone_generator),
76       packet_buffer_(packet_buffer),
77       payload_splitter_(payload_splitter),
78       timestamp_scaler_(timestamp_scaler),
79       vad_(new PostDecodeVad()),
80       expand_factory_(expand_factory),
81       accelerate_factory_(accelerate_factory),
82       preemptive_expand_factory_(preemptive_expand_factory),
83       last_mode_(kModeNormal),
84       decoded_buffer_length_(kMaxFrameSize),
85       decoded_buffer_(new int16_t[decoded_buffer_length_]),
86       playout_timestamp_(0),
87       new_codec_(false),
88       timestamp_(0),
89       reset_decoder_(false),
90       current_rtp_payload_type_(0xFF),      // Invalid RTP payload type.
91       current_cng_rtp_payload_type_(0xFF),  // Invalid RTP payload type.
92       ssrc_(0),
93       first_packet_(true),
94       error_code_(0),
95       decoder_error_code_(0),
96       background_noise_mode_(config.background_noise_mode),
97       playout_mode_(config.playout_mode),
98       enable_fast_accelerate_(config.enable_fast_accelerate),
99       nack_enabled_(false) {
100   LOG(LS_INFO) << "NetEq config: " << config.ToString();
101   int fs = config.sample_rate_hz;
102   if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
103     LOG(LS_ERROR) << "Sample rate " << fs << " Hz not supported. " <<
104         "Changing to 8000 Hz.";
105     fs = 8000;
106   }
107   fs_hz_ = fs;
108   fs_mult_ = fs / 8000;
109   output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
110   decoder_frame_length_ = 3 * output_size_samples_;
111   WebRtcSpl_Init();
112   if (create_components) {
113     SetSampleRateAndChannels(fs, 1);  // Default is 1 channel.
114   }
115   RTC_DCHECK(!vad_->enabled());
116   if (config.enable_post_decode_vad) {
117     vad_->Enable();
118   }
119 }
120 
121 NetEqImpl::~NetEqImpl() = default;
122 
InsertPacket(const WebRtcRTPHeader & rtp_header,const uint8_t * payload,size_t length_bytes,uint32_t receive_timestamp)123 int NetEqImpl::InsertPacket(const WebRtcRTPHeader& rtp_header,
124                             const uint8_t* payload,
125                             size_t length_bytes,
126                             uint32_t receive_timestamp) {
127   CriticalSectionScoped lock(crit_sect_.get());
128   LOG(LS_VERBOSE) << "InsertPacket: ts=" << rtp_header.header.timestamp <<
129       ", sn=" << rtp_header.header.sequenceNumber <<
130       ", pt=" << static_cast<int>(rtp_header.header.payloadType) <<
131       ", ssrc=" << rtp_header.header.ssrc <<
132       ", len=" << length_bytes;
133   int error = InsertPacketInternal(rtp_header, payload, length_bytes,
134                                    receive_timestamp, false);
135   if (error != 0) {
136     error_code_ = error;
137     return kFail;
138   }
139   return kOK;
140 }
141 
InsertSyncPacket(const WebRtcRTPHeader & rtp_header,uint32_t receive_timestamp)142 int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
143                                 uint32_t receive_timestamp) {
144   CriticalSectionScoped lock(crit_sect_.get());
145   LOG(LS_VERBOSE) << "InsertPacket-Sync: ts="
146       << rtp_header.header.timestamp <<
147       ", sn=" << rtp_header.header.sequenceNumber <<
148       ", pt=" << static_cast<int>(rtp_header.header.payloadType) <<
149       ", ssrc=" << rtp_header.header.ssrc;
150 
151   const uint8_t kSyncPayload[] = { 's', 'y', 'n', 'c' };
152   int error = InsertPacketInternal(
153       rtp_header, kSyncPayload, sizeof(kSyncPayload), receive_timestamp, true);
154 
155   if (error != 0) {
156     error_code_ = error;
157     return kFail;
158   }
159   return kOK;
160 }
161 
GetAudio(size_t max_length,int16_t * output_audio,size_t * samples_per_channel,int * num_channels,NetEqOutputType * type)162 int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio,
163                         size_t* samples_per_channel, int* num_channels,
164                         NetEqOutputType* type) {
165   CriticalSectionScoped lock(crit_sect_.get());
166   LOG(LS_VERBOSE) << "GetAudio";
167   int error = GetAudioInternal(max_length, output_audio, samples_per_channel,
168                                num_channels);
169   LOG(LS_VERBOSE) << "Produced " << *samples_per_channel <<
170       " samples/channel for " << *num_channels << " channel(s)";
171   if (error != 0) {
172     error_code_ = error;
173     return kFail;
174   }
175   if (type) {
176     *type = LastOutputType();
177   }
178   return kOK;
179 }
180 
RegisterPayloadType(NetEqDecoder codec,uint8_t rtp_payload_type)181 int NetEqImpl::RegisterPayloadType(NetEqDecoder codec,
182                                    uint8_t rtp_payload_type) {
183   CriticalSectionScoped lock(crit_sect_.get());
184   LOG(LS_VERBOSE) << "RegisterPayloadType "
185                   << static_cast<int>(rtp_payload_type) << " "
186                   << static_cast<int>(codec);
187   int ret = decoder_database_->RegisterPayload(rtp_payload_type, codec);
188   if (ret != DecoderDatabase::kOK) {
189     switch (ret) {
190       case DecoderDatabase::kInvalidRtpPayloadType:
191         error_code_ = kInvalidRtpPayloadType;
192         break;
193       case DecoderDatabase::kCodecNotSupported:
194         error_code_ = kCodecNotSupported;
195         break;
196       case DecoderDatabase::kDecoderExists:
197         error_code_ = kDecoderExists;
198         break;
199       default:
200         error_code_ = kOtherError;
201     }
202     return kFail;
203   }
204   return kOK;
205 }
206 
RegisterExternalDecoder(AudioDecoder * decoder,NetEqDecoder codec,uint8_t rtp_payload_type,int sample_rate_hz)207 int NetEqImpl::RegisterExternalDecoder(AudioDecoder* decoder,
208                                        NetEqDecoder codec,
209                                        uint8_t rtp_payload_type,
210                                        int sample_rate_hz) {
211   CriticalSectionScoped lock(crit_sect_.get());
212   LOG(LS_VERBOSE) << "RegisterExternalDecoder "
213                   << static_cast<int>(rtp_payload_type) << " "
214                   << static_cast<int>(codec);
215   if (!decoder) {
216     LOG(LS_ERROR) << "Cannot register external decoder with NULL pointer";
217     assert(false);
218     return kFail;
219   }
220   int ret = decoder_database_->InsertExternal(rtp_payload_type, codec,
221                                               sample_rate_hz, decoder);
222   if (ret != DecoderDatabase::kOK) {
223     switch (ret) {
224       case DecoderDatabase::kInvalidRtpPayloadType:
225         error_code_ = kInvalidRtpPayloadType;
226         break;
227       case DecoderDatabase::kCodecNotSupported:
228         error_code_ = kCodecNotSupported;
229         break;
230       case DecoderDatabase::kDecoderExists:
231         error_code_ = kDecoderExists;
232         break;
233       case DecoderDatabase::kInvalidSampleRate:
234         error_code_ = kInvalidSampleRate;
235         break;
236       case DecoderDatabase::kInvalidPointer:
237         error_code_ = kInvalidPointer;
238         break;
239       default:
240         error_code_ = kOtherError;
241     }
242     return kFail;
243   }
244   return kOK;
245 }
246 
RemovePayloadType(uint8_t rtp_payload_type)247 int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) {
248   CriticalSectionScoped lock(crit_sect_.get());
249   int ret = decoder_database_->Remove(rtp_payload_type);
250   if (ret == DecoderDatabase::kOK) {
251     return kOK;
252   } else if (ret == DecoderDatabase::kDecoderNotFound) {
253     error_code_ = kDecoderNotFound;
254   } else {
255     error_code_ = kOtherError;
256   }
257   return kFail;
258 }
259 
SetMinimumDelay(int delay_ms)260 bool NetEqImpl::SetMinimumDelay(int delay_ms) {
261   CriticalSectionScoped lock(crit_sect_.get());
262   if (delay_ms >= 0 && delay_ms < 10000) {
263     assert(delay_manager_.get());
264     return delay_manager_->SetMinimumDelay(delay_ms);
265   }
266   return false;
267 }
268 
SetMaximumDelay(int delay_ms)269 bool NetEqImpl::SetMaximumDelay(int delay_ms) {
270   CriticalSectionScoped lock(crit_sect_.get());
271   if (delay_ms >= 0 && delay_ms < 10000) {
272     assert(delay_manager_.get());
273     return delay_manager_->SetMaximumDelay(delay_ms);
274   }
275   return false;
276 }
277 
LeastRequiredDelayMs() const278 int NetEqImpl::LeastRequiredDelayMs() const {
279   CriticalSectionScoped lock(crit_sect_.get());
280   assert(delay_manager_.get());
281   return delay_manager_->least_required_delay_ms();
282 }
283 
SetTargetDelay()284 int NetEqImpl::SetTargetDelay() {
285   return kNotImplemented;
286 }
287 
TargetDelay()288 int NetEqImpl::TargetDelay() {
289   return kNotImplemented;
290 }
291 
CurrentDelayMs() const292 int NetEqImpl::CurrentDelayMs() const {
293   CriticalSectionScoped lock(crit_sect_.get());
294   if (fs_hz_ == 0)
295     return 0;
296   // Sum up the samples in the packet buffer with the future length of the sync
297   // buffer, and divide the sum by the sample rate.
298   const size_t delay_samples =
299       packet_buffer_->NumSamplesInBuffer(decoder_database_.get(),
300                                          decoder_frame_length_) +
301       sync_buffer_->FutureLength();
302   // The division below will truncate.
303   const int delay_ms =
304       static_cast<int>(delay_samples) / rtc::CheckedDivExact(fs_hz_, 1000);
305   return delay_ms;
306 }
307 
308 // Deprecated.
309 // TODO(henrik.lundin) Delete.
SetPlayoutMode(NetEqPlayoutMode mode)310 void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) {
311   CriticalSectionScoped lock(crit_sect_.get());
312   if (mode != playout_mode_) {
313     playout_mode_ = mode;
314     CreateDecisionLogic();
315   }
316 }
317 
318 // Deprecated.
319 // TODO(henrik.lundin) Delete.
PlayoutMode() const320 NetEqPlayoutMode NetEqImpl::PlayoutMode() const {
321   CriticalSectionScoped lock(crit_sect_.get());
322   return playout_mode_;
323 }
324 
NetworkStatistics(NetEqNetworkStatistics * stats)325 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
326   CriticalSectionScoped lock(crit_sect_.get());
327   assert(decoder_database_.get());
328   const size_t total_samples_in_buffers =
329       packet_buffer_->NumSamplesInBuffer(decoder_database_.get(),
330                                          decoder_frame_length_) +
331       sync_buffer_->FutureLength();
332   assert(delay_manager_.get());
333   assert(decision_logic_.get());
334   stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
335                               decoder_frame_length_, *delay_manager_.get(),
336                               *decision_logic_.get(), stats);
337   return 0;
338 }
339 
GetRtcpStatistics(RtcpStatistics * stats)340 void NetEqImpl::GetRtcpStatistics(RtcpStatistics* stats) {
341   CriticalSectionScoped lock(crit_sect_.get());
342   if (stats) {
343     rtcp_.GetStatistics(false, stats);
344   }
345 }
346 
GetRtcpStatisticsNoReset(RtcpStatistics * stats)347 void NetEqImpl::GetRtcpStatisticsNoReset(RtcpStatistics* stats) {
348   CriticalSectionScoped lock(crit_sect_.get());
349   if (stats) {
350     rtcp_.GetStatistics(true, stats);
351   }
352 }
353 
EnableVad()354 void NetEqImpl::EnableVad() {
355   CriticalSectionScoped lock(crit_sect_.get());
356   assert(vad_.get());
357   vad_->Enable();
358 }
359 
DisableVad()360 void NetEqImpl::DisableVad() {
361   CriticalSectionScoped lock(crit_sect_.get());
362   assert(vad_.get());
363   vad_->Disable();
364 }
365 
GetPlayoutTimestamp(uint32_t * timestamp)366 bool NetEqImpl::GetPlayoutTimestamp(uint32_t* timestamp) {
367   CriticalSectionScoped lock(crit_sect_.get());
368   if (first_packet_) {
369     // We don't have a valid RTP timestamp until we have decoded our first
370     // RTP packet.
371     return false;
372   }
373   *timestamp = timestamp_scaler_->ToExternal(playout_timestamp_);
374   return true;
375 }
376 
SetTargetNumberOfChannels()377 int NetEqImpl::SetTargetNumberOfChannels() {
378   return kNotImplemented;
379 }
380 
SetTargetSampleRate()381 int NetEqImpl::SetTargetSampleRate() {
382   return kNotImplemented;
383 }
384 
LastError() const385 int NetEqImpl::LastError() const {
386   CriticalSectionScoped lock(crit_sect_.get());
387   return error_code_;
388 }
389 
LastDecoderError()390 int NetEqImpl::LastDecoderError() {
391   CriticalSectionScoped lock(crit_sect_.get());
392   return decoder_error_code_;
393 }
394 
FlushBuffers()395 void NetEqImpl::FlushBuffers() {
396   CriticalSectionScoped lock(crit_sect_.get());
397   LOG(LS_VERBOSE) << "FlushBuffers";
398   packet_buffer_->Flush();
399   assert(sync_buffer_.get());
400   assert(expand_.get());
401   sync_buffer_->Flush();
402   sync_buffer_->set_next_index(sync_buffer_->next_index() -
403                                expand_->overlap_length());
404   // Set to wait for new codec.
405   first_packet_ = true;
406 }
407 
PacketBufferStatistics(int * current_num_packets,int * max_num_packets) const408 void NetEqImpl::PacketBufferStatistics(int* current_num_packets,
409                                        int* max_num_packets) const {
410   CriticalSectionScoped lock(crit_sect_.get());
411   packet_buffer_->BufferStat(current_num_packets, max_num_packets);
412 }
413 
EnableNack(size_t max_nack_list_size)414 void NetEqImpl::EnableNack(size_t max_nack_list_size) {
415   CriticalSectionScoped lock(crit_sect_.get());
416   if (!nack_enabled_) {
417     const int kNackThresholdPackets = 2;
418     nack_.reset(Nack::Create(kNackThresholdPackets));
419     nack_enabled_ = true;
420     nack_->UpdateSampleRate(fs_hz_);
421   }
422   nack_->SetMaxNackListSize(max_nack_list_size);
423 }
424 
DisableNack()425 void NetEqImpl::DisableNack() {
426   CriticalSectionScoped lock(crit_sect_.get());
427   nack_.reset();
428   nack_enabled_ = false;
429 }
430 
GetNackList(int64_t round_trip_time_ms) const431 std::vector<uint16_t> NetEqImpl::GetNackList(int64_t round_trip_time_ms) const {
432   CriticalSectionScoped lock(crit_sect_.get());
433   if (!nack_enabled_) {
434     return std::vector<uint16_t>();
435   }
436   RTC_DCHECK(nack_.get());
437   return nack_->GetNackList(round_trip_time_ms);
438 }
439 
sync_buffer_for_test() const440 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
441   CriticalSectionScoped lock(crit_sect_.get());
442   return sync_buffer_.get();
443 }
444 
445 // Methods below this line are private.
446 
InsertPacketInternal(const WebRtcRTPHeader & rtp_header,const uint8_t * payload,size_t length_bytes,uint32_t receive_timestamp,bool is_sync_packet)447 int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
448                                     const uint8_t* payload,
449                                     size_t length_bytes,
450                                     uint32_t receive_timestamp,
451                                     bool is_sync_packet) {
452   if (!payload) {
453     LOG_F(LS_ERROR) << "payload == NULL";
454     return kInvalidPointer;
455   }
456   // Sanity checks for sync-packets.
457   if (is_sync_packet) {
458     if (decoder_database_->IsDtmf(rtp_header.header.payloadType) ||
459         decoder_database_->IsRed(rtp_header.header.payloadType) ||
460         decoder_database_->IsComfortNoise(rtp_header.header.payloadType)) {
461       LOG_F(LS_ERROR) << "Sync-packet with an unacceptable payload type "
462                       << static_cast<int>(rtp_header.header.payloadType);
463       return kSyncPacketNotAccepted;
464     }
465     if (first_packet_ ||
466         rtp_header.header.payloadType != current_rtp_payload_type_ ||
467         rtp_header.header.ssrc != ssrc_) {
468       // Even if |current_rtp_payload_type_| is 0xFF, sync-packet isn't
469       // accepted.
470       LOG_F(LS_ERROR)
471           << "Changing codec, SSRC or first packet with sync-packet.";
472       return kSyncPacketNotAccepted;
473     }
474   }
475   PacketList packet_list;
476   RTPHeader main_header;
477   {
478     // Convert to Packet.
479     // Create |packet| within this separate scope, since it should not be used
480     // directly once it's been inserted in the packet list. This way, |packet|
481     // is not defined outside of this block.
482     Packet* packet = new Packet;
483     packet->header.markerBit = false;
484     packet->header.payloadType = rtp_header.header.payloadType;
485     packet->header.sequenceNumber = rtp_header.header.sequenceNumber;
486     packet->header.timestamp = rtp_header.header.timestamp;
487     packet->header.ssrc = rtp_header.header.ssrc;
488     packet->header.numCSRCs = 0;
489     packet->payload_length = length_bytes;
490     packet->primary = true;
491     packet->waiting_time = 0;
492     packet->payload = new uint8_t[packet->payload_length];
493     packet->sync_packet = is_sync_packet;
494     if (!packet->payload) {
495       LOG_F(LS_ERROR) << "Payload pointer is NULL.";
496     }
497     assert(payload);  // Already checked above.
498     memcpy(packet->payload, payload, packet->payload_length);
499     // Insert packet in a packet list.
500     packet_list.push_back(packet);
501     // Save main payloads header for later.
502     memcpy(&main_header, &packet->header, sizeof(main_header));
503   }
504 
505   bool update_sample_rate_and_channels = false;
506   // Reinitialize NetEq if it's needed (changed SSRC or first call).
507   if ((main_header.ssrc != ssrc_) || first_packet_) {
508     // Note: |first_packet_| will be cleared further down in this method, once
509     // the packet has been successfully inserted into the packet buffer.
510 
511     rtcp_.Init(main_header.sequenceNumber);
512 
513     // Flush the packet buffer and DTMF buffer.
514     packet_buffer_->Flush();
515     dtmf_buffer_->Flush();
516 
517     // Store new SSRC.
518     ssrc_ = main_header.ssrc;
519 
520     // Update audio buffer timestamp.
521     sync_buffer_->IncreaseEndTimestamp(main_header.timestamp - timestamp_);
522 
523     // Update codecs.
524     timestamp_ = main_header.timestamp;
525     current_rtp_payload_type_ = main_header.payloadType;
526 
527     // Reset timestamp scaling.
528     timestamp_scaler_->Reset();
529 
530     // Trigger an update of sampling rate and the number of channels.
531     update_sample_rate_and_channels = true;
532   }
533 
534   // Update RTCP statistics, only for regular packets.
535   if (!is_sync_packet)
536     rtcp_.Update(main_header, receive_timestamp);
537 
538   // Check for RED payload type, and separate payloads into several packets.
539   if (decoder_database_->IsRed(main_header.payloadType)) {
540     assert(!is_sync_packet);  // We had a sanity check for this.
541     if (payload_splitter_->SplitRed(&packet_list) != PayloadSplitter::kOK) {
542       PacketBuffer::DeleteAllPackets(&packet_list);
543       return kRedundancySplitError;
544     }
545     // Only accept a few RED payloads of the same type as the main data,
546     // DTMF events and CNG.
547     payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_);
548     // Update the stored main payload header since the main payload has now
549     // changed.
550     memcpy(&main_header, &packet_list.front()->header, sizeof(main_header));
551   }
552 
553   // Check payload types.
554   if (decoder_database_->CheckPayloadTypes(packet_list) ==
555       DecoderDatabase::kDecoderNotFound) {
556     PacketBuffer::DeleteAllPackets(&packet_list);
557     return kUnknownRtpPayloadType;
558   }
559 
560   // Scale timestamp to internal domain (only for some codecs).
561   timestamp_scaler_->ToInternal(&packet_list);
562 
563   // Process DTMF payloads. Cycle through the list of packets, and pick out any
564   // DTMF payloads found.
565   PacketList::iterator it = packet_list.begin();
566   while (it != packet_list.end()) {
567     Packet* current_packet = (*it);
568     assert(current_packet);
569     assert(current_packet->payload);
570     if (decoder_database_->IsDtmf(current_packet->header.payloadType)) {
571       assert(!current_packet->sync_packet);  // We had a sanity check for this.
572       DtmfEvent event;
573       int ret = DtmfBuffer::ParseEvent(
574           current_packet->header.timestamp,
575           current_packet->payload,
576           current_packet->payload_length,
577           &event);
578       if (ret != DtmfBuffer::kOK) {
579         PacketBuffer::DeleteAllPackets(&packet_list);
580         return kDtmfParsingError;
581       }
582       if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) {
583         PacketBuffer::DeleteAllPackets(&packet_list);
584         return kDtmfInsertError;
585       }
586       // TODO(hlundin): Let the destructor of Packet handle the payload.
587       delete [] current_packet->payload;
588       delete current_packet;
589       it = packet_list.erase(it);
590     } else {
591       ++it;
592     }
593   }
594 
595   // Check for FEC in packets, and separate payloads into several packets.
596   int ret = payload_splitter_->SplitFec(&packet_list, decoder_database_.get());
597   if (ret != PayloadSplitter::kOK) {
598     PacketBuffer::DeleteAllPackets(&packet_list);
599     switch (ret) {
600       case PayloadSplitter::kUnknownPayloadType:
601         return kUnknownRtpPayloadType;
602       default:
603         return kOtherError;
604     }
605   }
606 
607   // Split payloads into smaller chunks. This also verifies that all payloads
608   // are of a known payload type. SplitAudio() method is protected against
609   // sync-packets.
610   ret = payload_splitter_->SplitAudio(&packet_list, *decoder_database_);
611   if (ret != PayloadSplitter::kOK) {
612     PacketBuffer::DeleteAllPackets(&packet_list);
613     switch (ret) {
614       case PayloadSplitter::kUnknownPayloadType:
615         return kUnknownRtpPayloadType;
616       case PayloadSplitter::kFrameSplitError:
617         return kFrameSplitError;
618       default:
619         return kOtherError;
620     }
621   }
622 
623   // Update bandwidth estimate, if the packet is not sync-packet.
624   if (!packet_list.empty() && !packet_list.front()->sync_packet) {
625     // The list can be empty here if we got nothing but DTMF payloads.
626     AudioDecoder* decoder =
627         decoder_database_->GetDecoder(main_header.payloadType);
628     assert(decoder);  // Should always get a valid object, since we have
629                       // already checked that the payload types are known.
630     decoder->IncomingPacket(packet_list.front()->payload,
631                             packet_list.front()->payload_length,
632                             packet_list.front()->header.sequenceNumber,
633                             packet_list.front()->header.timestamp,
634                             receive_timestamp);
635   }
636 
637   if (nack_enabled_) {
638     RTC_DCHECK(nack_);
639     if (update_sample_rate_and_channels) {
640       nack_->Reset();
641     }
642     nack_->UpdateLastReceivedPacket(packet_list.front()->header.sequenceNumber,
643                                     packet_list.front()->header.timestamp);
644   }
645 
646   // Insert packets in buffer.
647   const size_t buffer_length_before_insert =
648       packet_buffer_->NumPacketsInBuffer();
649   ret = packet_buffer_->InsertPacketList(
650       &packet_list,
651       *decoder_database_,
652       &current_rtp_payload_type_,
653       &current_cng_rtp_payload_type_);
654   if (ret == PacketBuffer::kFlushed) {
655     // Reset DSP timestamp etc. if packet buffer flushed.
656     new_codec_ = true;
657     update_sample_rate_and_channels = true;
658   } else if (ret != PacketBuffer::kOK) {
659     PacketBuffer::DeleteAllPackets(&packet_list);
660     return kOtherError;
661   }
662 
663   if (first_packet_) {
664     first_packet_ = false;
665     // Update the codec on the next GetAudio call.
666     new_codec_ = true;
667   }
668 
669   if (current_rtp_payload_type_ != 0xFF) {
670     const DecoderDatabase::DecoderInfo* dec_info =
671         decoder_database_->GetDecoderInfo(current_rtp_payload_type_);
672     if (!dec_info) {
673       assert(false);  // Already checked that the payload type is known.
674     }
675   }
676 
677   if (update_sample_rate_and_channels && !packet_buffer_->Empty()) {
678     // We do not use |current_rtp_payload_type_| to |set payload_type|, but
679     // get the next RTP header from |packet_buffer_| to obtain the payload type.
680     // The reason for it is the following corner case. If NetEq receives a
681     // CNG packet with a sample rate different than the current CNG then it
682     // flushes its buffer, assuming send codec must have been changed. However,
683     // payload type of the hypothetically new send codec is not known.
684     const RTPHeader* rtp_header = packet_buffer_->NextRtpHeader();
685     assert(rtp_header);
686     int payload_type = rtp_header->payloadType;
687     AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type);
688     assert(decoder);  // Payloads are already checked to be valid.
689     const DecoderDatabase::DecoderInfo* decoder_info =
690         decoder_database_->GetDecoderInfo(payload_type);
691     assert(decoder_info);
692     if (decoder_info->fs_hz != fs_hz_ ||
693         decoder->Channels() != algorithm_buffer_->Channels()) {
694       SetSampleRateAndChannels(decoder_info->fs_hz, decoder->Channels());
695     }
696     if (nack_enabled_) {
697       RTC_DCHECK(nack_);
698       // Update the sample rate even if the rate is not new, because of Reset().
699       nack_->UpdateSampleRate(fs_hz_);
700     }
701   }
702 
703   // TODO(hlundin): Move this code to DelayManager class.
704   const DecoderDatabase::DecoderInfo* dec_info =
705           decoder_database_->GetDecoderInfo(main_header.payloadType);
706   assert(dec_info);  // Already checked that the payload type is known.
707   delay_manager_->LastDecoderType(dec_info->codec_type);
708   if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
709     // Calculate the total speech length carried in each packet.
710     const size_t buffer_length_after_insert =
711         packet_buffer_->NumPacketsInBuffer();
712 
713     if (buffer_length_after_insert > buffer_length_before_insert) {
714       const size_t packet_length_samples =
715           (buffer_length_after_insert - buffer_length_before_insert) *
716           decoder_frame_length_;
717       if (packet_length_samples != decision_logic_->packet_length_samples()) {
718         decision_logic_->set_packet_length_samples(packet_length_samples);
719         delay_manager_->SetPacketAudioLength(
720             rtc::checked_cast<int>((1000 * packet_length_samples) / fs_hz_));
721       }
722     }
723 
724     // Update statistics.
725     if ((int32_t) (main_header.timestamp - timestamp_) >= 0 &&
726         !new_codec_) {
727       // Only update statistics if incoming packet is not older than last played
728       // out packet, and if new codec flag is not set.
729       delay_manager_->Update(main_header.sequenceNumber, main_header.timestamp,
730                              fs_hz_);
731     }
732   } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
733     // This is first "normal" packet after CNG or DTMF.
734     // Reset packet time counter and measure time until next packet,
735     // but don't update statistics.
736     delay_manager_->set_last_pack_cng_or_dtmf(0);
737     delay_manager_->ResetPacketIatCount();
738   }
739   return 0;
740 }
741 
GetAudioInternal(size_t max_length,int16_t * output,size_t * samples_per_channel,int * num_channels)742 int NetEqImpl::GetAudioInternal(size_t max_length,
743                                 int16_t* output,
744                                 size_t* samples_per_channel,
745                                 int* num_channels) {
746   PacketList packet_list;
747   DtmfEvent dtmf_event;
748   Operations operation;
749   bool play_dtmf;
750   int return_value = GetDecision(&operation, &packet_list, &dtmf_event,
751                                  &play_dtmf);
752   if (return_value != 0) {
753     last_mode_ = kModeError;
754     return return_value;
755   }
756   LOG(LS_VERBOSE) << "GetDecision returned operation=" << operation <<
757       " and " << packet_list.size() << " packet(s)";
758 
759   AudioDecoder::SpeechType speech_type;
760   int length = 0;
761   int decode_return_value = Decode(&packet_list, &operation,
762                                    &length, &speech_type);
763 
764   assert(vad_.get());
765   bool sid_frame_available =
766       (operation == kRfc3389Cng && !packet_list.empty());
767   vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
768                sid_frame_available, fs_hz_);
769 
770   algorithm_buffer_->Clear();
771   switch (operation) {
772     case kNormal: {
773       DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
774       break;
775     }
776     case kMerge: {
777       DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf);
778       break;
779     }
780     case kExpand: {
781       return_value = DoExpand(play_dtmf);
782       break;
783     }
784     case kAccelerate:
785     case kFastAccelerate: {
786       const bool fast_accelerate =
787           enable_fast_accelerate_ && (operation == kFastAccelerate);
788       return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type,
789                                   play_dtmf, fast_accelerate);
790       break;
791     }
792     case kPreemptiveExpand: {
793       return_value = DoPreemptiveExpand(decoded_buffer_.get(), length,
794                                         speech_type, play_dtmf);
795       break;
796     }
797     case kRfc3389Cng:
798     case kRfc3389CngNoPacket: {
799       return_value = DoRfc3389Cng(&packet_list, play_dtmf);
800       break;
801     }
802     case kCodecInternalCng: {
803       // This handles the case when there is no transmission and the decoder
804       // should produce internal comfort noise.
805       // TODO(hlundin): Write test for codec-internal CNG.
806       DoCodecInternalCng(decoded_buffer_.get(), length);
807       break;
808     }
809     case kDtmf: {
810       // TODO(hlundin): Write test for this.
811       return_value = DoDtmf(dtmf_event, &play_dtmf);
812       break;
813     }
814     case kAlternativePlc: {
815       // TODO(hlundin): Write test for this.
816       DoAlternativePlc(false);
817       break;
818     }
819     case kAlternativePlcIncreaseTimestamp: {
820       // TODO(hlundin): Write test for this.
821       DoAlternativePlc(true);
822       break;
823     }
824     case kAudioRepetitionIncreaseTimestamp: {
825       // TODO(hlundin): Write test for this.
826       sync_buffer_->IncreaseEndTimestamp(
827           static_cast<uint32_t>(output_size_samples_));
828       // Skipping break on purpose. Execution should move on into the
829       // next case.
830       FALLTHROUGH();
831     }
832     case kAudioRepetition: {
833       // TODO(hlundin): Write test for this.
834       // Copy last |output_size_samples_| from |sync_buffer_| to
835       // |algorithm_buffer|.
836       algorithm_buffer_->PushBackFromIndex(
837           *sync_buffer_, sync_buffer_->Size() - output_size_samples_);
838       expand_->Reset();
839       break;
840     }
841     case kUndefined: {
842       LOG(LS_ERROR) << "Invalid operation kUndefined.";
843       assert(false);  // This should not happen.
844       last_mode_ = kModeError;
845       return kInvalidOperation;
846     }
847   }  // End of switch.
848   if (return_value < 0) {
849     return return_value;
850   }
851 
852   if (last_mode_ != kModeRfc3389Cng) {
853     comfort_noise_->Reset();
854   }
855 
856   // Copy from |algorithm_buffer| to |sync_buffer_|.
857   sync_buffer_->PushBack(*algorithm_buffer_);
858 
859   // Extract data from |sync_buffer_| to |output|.
860   size_t num_output_samples_per_channel = output_size_samples_;
861   size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels();
862   if (num_output_samples > max_length) {
863     LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " <<
864         output_size_samples_ << " * " << sync_buffer_->Channels();
865     num_output_samples = max_length;
866     num_output_samples_per_channel = max_length / sync_buffer_->Channels();
867   }
868   const size_t samples_from_sync =
869       sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
870                                             output);
871   *num_channels = static_cast<int>(sync_buffer_->Channels());
872   LOG(LS_VERBOSE) << "Sync buffer (" << *num_channels << " channel(s)):" <<
873       " insert " << algorithm_buffer_->Size() << " samples, extract " <<
874       samples_from_sync << " samples";
875   if (sync_buffer_->FutureLength() < expand_->overlap_length()) {
876     // The sync buffer should always contain |overlap_length| samples, but now
877     // too many samples have been extracted. Reinstall the |overlap_length|
878     // lookahead by moving the index.
879     const size_t missing_lookahead_samples =
880         expand_->overlap_length() - sync_buffer_->FutureLength();
881     RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples);
882     sync_buffer_->set_next_index(sync_buffer_->next_index() -
883                                  missing_lookahead_samples);
884   }
885   if (samples_from_sync != output_size_samples_) {
886     LOG(LS_ERROR) << "samples_from_sync (" << samples_from_sync
887                   << ") != output_size_samples_ (" << output_size_samples_
888                   << ")";
889     // TODO(minyue): treatment of under-run, filling zeros
890     memset(output, 0, num_output_samples * sizeof(int16_t));
891     *samples_per_channel = output_size_samples_;
892     return kSampleUnderrun;
893   }
894   *samples_per_channel = output_size_samples_;
895 
896   // Should always have overlap samples left in the |sync_buffer_|.
897   RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
898 
899   if (play_dtmf) {
900     return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), output);
901   }
902 
903   // Update the background noise parameters if last operation wrote data
904   // straight from the decoder to the |sync_buffer_|. That is, none of the
905   // operations that modify the signal can be followed by a parameter update.
906   if ((last_mode_ == kModeNormal) ||
907       (last_mode_ == kModeAccelerateFail) ||
908       (last_mode_ == kModePreemptiveExpandFail) ||
909       (last_mode_ == kModeRfc3389Cng) ||
910       (last_mode_ == kModeCodecInternalCng)) {
911     background_noise_->Update(*sync_buffer_, *vad_.get());
912   }
913 
914   if (operation == kDtmf) {
915     // DTMF data was written the end of |sync_buffer_|.
916     // Update index to end of DTMF data in |sync_buffer_|.
917     sync_buffer_->set_dtmf_index(sync_buffer_->Size());
918   }
919 
920   if (last_mode_ != kModeExpand) {
921     // If last operation was not expand, calculate the |playout_timestamp_| from
922     // the |sync_buffer_|. However, do not update the |playout_timestamp_| if it
923     // would be moved "backwards".
924     uint32_t temp_timestamp = sync_buffer_->end_timestamp() -
925         static_cast<uint32_t>(sync_buffer_->FutureLength());
926     if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) {
927       playout_timestamp_ = temp_timestamp;
928     }
929   } else {
930     // Use dead reckoning to estimate the |playout_timestamp_|.
931     playout_timestamp_ += static_cast<uint32_t>(output_size_samples_);
932   }
933 
934   if (decode_return_value) return decode_return_value;
935   return return_value;
936 }
937 
GetDecision(Operations * operation,PacketList * packet_list,DtmfEvent * dtmf_event,bool * play_dtmf)938 int NetEqImpl::GetDecision(Operations* operation,
939                            PacketList* packet_list,
940                            DtmfEvent* dtmf_event,
941                            bool* play_dtmf) {
942   // Initialize output variables.
943   *play_dtmf = false;
944   *operation = kUndefined;
945 
946   // Increment time counters.
947   packet_buffer_->IncrementWaitingTimes();
948   stats_.IncreaseCounter(output_size_samples_, fs_hz_);
949 
950   assert(sync_buffer_.get());
951   uint32_t end_timestamp = sync_buffer_->end_timestamp();
952   if (!new_codec_) {
953     const uint32_t five_seconds_samples = 5 * fs_hz_;
954     packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples);
955   }
956   const RTPHeader* header = packet_buffer_->NextRtpHeader();
957 
958   if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
959     // Because of timestamp peculiarities, we have to "manually" disallow using
960     // a CNG packet with the same timestamp as the one that was last played.
961     // This can happen when using redundancy and will cause the timing to shift.
962     while (header && decoder_database_->IsComfortNoise(header->payloadType) &&
963            (end_timestamp >= header->timestamp ||
964             end_timestamp + decision_logic_->generated_noise_samples() >
965                 header->timestamp)) {
966       // Don't use this packet, discard it.
967       if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) {
968         assert(false);  // Must be ok by design.
969       }
970       // Check buffer again.
971       if (!new_codec_) {
972         packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_);
973       }
974       header = packet_buffer_->NextRtpHeader();
975     }
976   }
977 
978   assert(expand_.get());
979   const int samples_left = static_cast<int>(sync_buffer_->FutureLength() -
980       expand_->overlap_length());
981   if (last_mode_ == kModeAccelerateSuccess ||
982       last_mode_ == kModeAccelerateLowEnergy ||
983       last_mode_ == kModePreemptiveExpandSuccess ||
984       last_mode_ == kModePreemptiveExpandLowEnergy) {
985     // Subtract (samples_left + output_size_samples_) from sampleMemory.
986     decision_logic_->AddSampleMemory(
987         -(samples_left + rtc::checked_cast<int>(output_size_samples_)));
988   }
989 
990   // Check if it is time to play a DTMF event.
991   if (dtmf_buffer_->GetEvent(
992       static_cast<uint32_t>(
993           end_timestamp + decision_logic_->generated_noise_samples()),
994       dtmf_event)) {
995     *play_dtmf = true;
996   }
997 
998   // Get instruction.
999   assert(sync_buffer_.get());
1000   assert(expand_.get());
1001   *operation = decision_logic_->GetDecision(*sync_buffer_,
1002                                             *expand_,
1003                                             decoder_frame_length_,
1004                                             header,
1005                                             last_mode_,
1006                                             *play_dtmf,
1007                                             &reset_decoder_);
1008 
1009   // Check if we already have enough samples in the |sync_buffer_|. If so,
1010   // change decision to normal, unless the decision was merge, accelerate, or
1011   // preemptive expand.
1012   if (samples_left >= rtc::checked_cast<int>(output_size_samples_) &&
1013       *operation != kMerge &&
1014       *operation != kAccelerate &&
1015       *operation != kFastAccelerate &&
1016       *operation != kPreemptiveExpand) {
1017     *operation = kNormal;
1018     return 0;
1019   }
1020 
1021   decision_logic_->ExpandDecision(*operation);
1022 
1023   // Check conditions for reset.
1024   if (new_codec_ || *operation == kUndefined) {
1025     // The only valid reason to get kUndefined is that new_codec_ is set.
1026     assert(new_codec_);
1027     if (*play_dtmf && !header) {
1028       timestamp_ = dtmf_event->timestamp;
1029     } else {
1030       if (!header) {
1031         LOG(LS_ERROR) << "Packet missing where it shouldn't.";
1032         return -1;
1033       }
1034       timestamp_ = header->timestamp;
1035       if (*operation == kRfc3389CngNoPacket
1036 #ifndef LEGACY_BITEXACT
1037           // Without this check, it can happen that a non-CNG packet is sent to
1038           // the CNG decoder as if it was a SID frame. This is clearly a bug,
1039           // but is kept for now to maintain bit-exactness with the test
1040           // vectors.
1041           && decoder_database_->IsComfortNoise(header->payloadType)
1042 #endif
1043       ) {
1044         // Change decision to CNG packet, since we do have a CNG packet, but it
1045         // was considered too early to use. Now, use it anyway.
1046         *operation = kRfc3389Cng;
1047       } else if (*operation != kRfc3389Cng) {
1048         *operation = kNormal;
1049       }
1050     }
1051     // Adjust |sync_buffer_| timestamp before setting |end_timestamp| to the
1052     // new value.
1053     sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp);
1054     end_timestamp = timestamp_;
1055     new_codec_ = false;
1056     decision_logic_->SoftReset();
1057     buffer_level_filter_->Reset();
1058     delay_manager_->Reset();
1059     stats_.ResetMcu();
1060   }
1061 
1062   size_t required_samples = output_size_samples_;
1063   const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_);
1064   const size_t samples_20_ms = 2 * samples_10_ms;
1065   const size_t samples_30_ms = 3 * samples_10_ms;
1066 
1067   switch (*operation) {
1068     case kExpand: {
1069       timestamp_ = end_timestamp;
1070       return 0;
1071     }
1072     case kRfc3389CngNoPacket:
1073     case kCodecInternalCng: {
1074       return 0;
1075     }
1076     case kDtmf: {
1077       // TODO(hlundin): Write test for this.
1078       // Update timestamp.
1079       timestamp_ = end_timestamp;
1080       if (decision_logic_->generated_noise_samples() > 0 &&
1081           last_mode_ != kModeDtmf) {
1082         // Make a jump in timestamp due to the recently played comfort noise.
1083         uint32_t timestamp_jump =
1084             static_cast<uint32_t>(decision_logic_->generated_noise_samples());
1085         sync_buffer_->IncreaseEndTimestamp(timestamp_jump);
1086         timestamp_ += timestamp_jump;
1087       }
1088       decision_logic_->set_generated_noise_samples(0);
1089       return 0;
1090     }
1091     case kAccelerate:
1092     case kFastAccelerate: {
1093       // In order to do an accelerate we need at least 30 ms of audio data.
1094       if (samples_left >= static_cast<int>(samples_30_ms)) {
1095         // Already have enough data, so we do not need to extract any more.
1096         decision_logic_->set_sample_memory(samples_left);
1097         decision_logic_->set_prev_time_scale(true);
1098         return 0;
1099       } else if (samples_left >= static_cast<int>(samples_10_ms) &&
1100           decoder_frame_length_ >= samples_30_ms) {
1101         // Avoid decoding more data as it might overflow the playout buffer.
1102         *operation = kNormal;
1103         return 0;
1104       } else if (samples_left < static_cast<int>(samples_20_ms) &&
1105           decoder_frame_length_ < samples_30_ms) {
1106         // Build up decoded data by decoding at least 20 ms of audio data. Do
1107         // not perform accelerate yet, but wait until we only need to do one
1108         // decoding.
1109         required_samples = 2 * output_size_samples_;
1110         *operation = kNormal;
1111       }
1112       // If none of the above is true, we have one of two possible situations:
1113       // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or
1114       // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms.
1115       // In either case, we move on with the accelerate decision, and decode one
1116       // frame now.
1117       break;
1118     }
1119     case kPreemptiveExpand: {
1120       // In order to do a preemptive expand we need at least 30 ms of decoded
1121       // audio data.
1122       if ((samples_left >= static_cast<int>(samples_30_ms)) ||
1123           (samples_left >= static_cast<int>(samples_10_ms) &&
1124               decoder_frame_length_ >= samples_30_ms)) {
1125         // Already have enough data, so we do not need to extract any more.
1126         // Or, avoid decoding more data as it might overflow the playout buffer.
1127         // Still try preemptive expand, though.
1128         decision_logic_->set_sample_memory(samples_left);
1129         decision_logic_->set_prev_time_scale(true);
1130         return 0;
1131       }
1132       if (samples_left < static_cast<int>(samples_20_ms) &&
1133           decoder_frame_length_ < samples_30_ms) {
1134         // Build up decoded data by decoding at least 20 ms of audio data.
1135         // Still try to perform preemptive expand.
1136         required_samples = 2 * output_size_samples_;
1137       }
1138       // Move on with the preemptive expand decision.
1139       break;
1140     }
1141     case kMerge: {
1142       required_samples =
1143           std::max(merge_->RequiredFutureSamples(), required_samples);
1144       break;
1145     }
1146     default: {
1147       // Do nothing.
1148     }
1149   }
1150 
1151   // Get packets from buffer.
1152   int extracted_samples = 0;
1153   if (header &&
1154       *operation != kAlternativePlc &&
1155       *operation != kAlternativePlcIncreaseTimestamp &&
1156       *operation != kAudioRepetition &&
1157       *operation != kAudioRepetitionIncreaseTimestamp) {
1158     sync_buffer_->IncreaseEndTimestamp(header->timestamp - end_timestamp);
1159     if (decision_logic_->CngOff()) {
1160       // Adjustment of timestamp only corresponds to an actual packet loss
1161       // if comfort noise is not played. If comfort noise was just played,
1162       // this adjustment of timestamp is only done to get back in sync with the
1163       // stream timestamp; no loss to report.
1164       stats_.LostSamples(header->timestamp - end_timestamp);
1165     }
1166 
1167     if (*operation != kRfc3389Cng) {
1168       // We are about to decode and use a non-CNG packet.
1169       decision_logic_->SetCngOff();
1170     }
1171     // Reset CNG timestamp as a new packet will be delivered.
1172     // (Also if this is a CNG packet, since playedOutTS is updated.)
1173     decision_logic_->set_generated_noise_samples(0);
1174 
1175     extracted_samples = ExtractPackets(required_samples, packet_list);
1176     if (extracted_samples < 0) {
1177       return kPacketBufferCorruption;
1178     }
1179   }
1180 
1181   if (*operation == kAccelerate || *operation == kFastAccelerate ||
1182       *operation == kPreemptiveExpand) {
1183     decision_logic_->set_sample_memory(samples_left + extracted_samples);
1184     decision_logic_->set_prev_time_scale(true);
1185   }
1186 
1187   if (*operation == kAccelerate || *operation == kFastAccelerate) {
1188     // Check that we have enough data (30ms) to do accelerate.
1189     if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) {
1190       // TODO(hlundin): Write test for this.
1191       // Not enough, do normal operation instead.
1192       *operation = kNormal;
1193     }
1194   }
1195 
1196   timestamp_ = end_timestamp;
1197   return 0;
1198 }
1199 
Decode(PacketList * packet_list,Operations * operation,int * decoded_length,AudioDecoder::SpeechType * speech_type)1200 int NetEqImpl::Decode(PacketList* packet_list, Operations* operation,
1201                       int* decoded_length,
1202                       AudioDecoder::SpeechType* speech_type) {
1203   *speech_type = AudioDecoder::kSpeech;
1204 
1205   // When packet_list is empty, we may be in kCodecInternalCng mode, and for
1206   // that we use current active decoder.
1207   AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
1208 
1209   if (!packet_list->empty()) {
1210     const Packet* packet = packet_list->front();
1211     uint8_t payload_type = packet->header.payloadType;
1212     if (!decoder_database_->IsComfortNoise(payload_type)) {
1213       decoder = decoder_database_->GetDecoder(payload_type);
1214       assert(decoder);
1215       if (!decoder) {
1216         LOG(LS_WARNING) << "Unknown payload type "
1217                         << static_cast<int>(payload_type);
1218         PacketBuffer::DeleteAllPackets(packet_list);
1219         return kDecoderNotFound;
1220       }
1221       bool decoder_changed;
1222       decoder_database_->SetActiveDecoder(payload_type, &decoder_changed);
1223       if (decoder_changed) {
1224         // We have a new decoder. Re-init some values.
1225         const DecoderDatabase::DecoderInfo* decoder_info = decoder_database_
1226             ->GetDecoderInfo(payload_type);
1227         assert(decoder_info);
1228         if (!decoder_info) {
1229           LOG(LS_WARNING) << "Unknown payload type "
1230                           << static_cast<int>(payload_type);
1231           PacketBuffer::DeleteAllPackets(packet_list);
1232           return kDecoderNotFound;
1233         }
1234         // If sampling rate or number of channels has changed, we need to make
1235         // a reset.
1236         if (decoder_info->fs_hz != fs_hz_ ||
1237             decoder->Channels() != algorithm_buffer_->Channels()) {
1238           // TODO(tlegrand): Add unittest to cover this event.
1239           SetSampleRateAndChannels(decoder_info->fs_hz, decoder->Channels());
1240         }
1241         sync_buffer_->set_end_timestamp(timestamp_);
1242         playout_timestamp_ = timestamp_;
1243       }
1244     }
1245   }
1246 
1247   if (reset_decoder_) {
1248     // TODO(hlundin): Write test for this.
1249     if (decoder)
1250       decoder->Reset();
1251 
1252     // Reset comfort noise decoder.
1253     AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
1254     if (cng_decoder)
1255       cng_decoder->Reset();
1256 
1257     reset_decoder_ = false;
1258   }
1259 
1260 #ifdef LEGACY_BITEXACT
1261   // Due to a bug in old SignalMCU, it could happen that CNG operation was
1262   // decided, but a speech packet was provided. The speech packet will be used
1263   // to update the comfort noise decoder, as if it was a SID frame, which is
1264   // clearly wrong.
1265   if (*operation == kRfc3389Cng) {
1266     return 0;
1267   }
1268 #endif
1269 
1270   *decoded_length = 0;
1271   // Update codec-internal PLC state.
1272   if ((*operation == kMerge) && decoder && decoder->HasDecodePlc()) {
1273     decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]);
1274   }
1275 
1276   int return_value;
1277   if (*operation == kCodecInternalCng) {
1278     RTC_DCHECK(packet_list->empty());
1279     return_value = DecodeCng(decoder, decoded_length, speech_type);
1280   } else {
1281     return_value = DecodeLoop(packet_list, *operation, decoder,
1282                               decoded_length, speech_type);
1283   }
1284 
1285   if (*decoded_length < 0) {
1286     // Error returned from the decoder.
1287     *decoded_length = 0;
1288     sync_buffer_->IncreaseEndTimestamp(
1289         static_cast<uint32_t>(decoder_frame_length_));
1290     int error_code = 0;
1291     if (decoder)
1292       error_code = decoder->ErrorCode();
1293     if (error_code != 0) {
1294       // Got some error code from the decoder.
1295       decoder_error_code_ = error_code;
1296       return_value = kDecoderErrorCode;
1297       LOG(LS_WARNING) << "Decoder returned error code: " << error_code;
1298     } else {
1299       // Decoder does not implement error codes. Return generic error.
1300       return_value = kOtherDecoderError;
1301       LOG(LS_WARNING) << "Decoder error (no error code)";
1302     }
1303     *operation = kExpand;  // Do expansion to get data instead.
1304   }
1305   if (*speech_type != AudioDecoder::kComfortNoise) {
1306     // Don't increment timestamp if codec returned CNG speech type
1307     // since in this case, the we will increment the CNGplayedTS counter.
1308     // Increase with number of samples per channel.
1309     assert(*decoded_length == 0 ||
1310            (decoder && decoder->Channels() == sync_buffer_->Channels()));
1311     sync_buffer_->IncreaseEndTimestamp(
1312         *decoded_length / static_cast<int>(sync_buffer_->Channels()));
1313   }
1314   return return_value;
1315 }
1316 
DecodeCng(AudioDecoder * decoder,int * decoded_length,AudioDecoder::SpeechType * speech_type)1317 int NetEqImpl::DecodeCng(AudioDecoder* decoder, int* decoded_length,
1318                          AudioDecoder::SpeechType* speech_type) {
1319   if (!decoder) {
1320     // This happens when active decoder is not defined.
1321     *decoded_length = -1;
1322     return 0;
1323   }
1324 
1325   while (*decoded_length < rtc::checked_cast<int>(output_size_samples_)) {
1326     const int length = decoder->Decode(
1327             nullptr, 0, fs_hz_,
1328             (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1329             &decoded_buffer_[*decoded_length], speech_type);
1330     if (length > 0) {
1331       *decoded_length += length;
1332       LOG(LS_VERBOSE) << "Decoded " << length << " CNG samples";
1333     } else {
1334       // Error.
1335       LOG(LS_WARNING) << "Failed to decode CNG";
1336       *decoded_length = -1;
1337       break;
1338     }
1339     if (*decoded_length > static_cast<int>(decoded_buffer_length_)) {
1340       // Guard against overflow.
1341       LOG(LS_WARNING) << "Decoded too much CNG.";
1342       return kDecodedTooMuch;
1343     }
1344   }
1345   return 0;
1346 }
1347 
DecodeLoop(PacketList * packet_list,const Operations & operation,AudioDecoder * decoder,int * decoded_length,AudioDecoder::SpeechType * speech_type)1348 int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation,
1349                           AudioDecoder* decoder, int* decoded_length,
1350                           AudioDecoder::SpeechType* speech_type) {
1351   Packet* packet = NULL;
1352   if (!packet_list->empty()) {
1353     packet = packet_list->front();
1354   }
1355 
1356   // Do decoding.
1357   while (packet &&
1358       !decoder_database_->IsComfortNoise(packet->header.payloadType)) {
1359     assert(decoder);  // At this point, we must have a decoder object.
1360     // The number of channels in the |sync_buffer_| should be the same as the
1361     // number decoder channels.
1362     assert(sync_buffer_->Channels() == decoder->Channels());
1363     assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->Channels());
1364     assert(operation == kNormal || operation == kAccelerate ||
1365            operation == kFastAccelerate || operation == kMerge ||
1366            operation == kPreemptiveExpand);
1367     packet_list->pop_front();
1368     size_t payload_length = packet->payload_length;
1369     int decode_length;
1370     if (packet->sync_packet) {
1371       // Decode to silence with the same frame size as the last decode.
1372       LOG(LS_VERBOSE) << "Decoding sync-packet: " <<
1373           " ts=" << packet->header.timestamp <<
1374           ", sn=" << packet->header.sequenceNumber <<
1375           ", pt=" << static_cast<int>(packet->header.payloadType) <<
1376           ", ssrc=" << packet->header.ssrc <<
1377           ", len=" << packet->payload_length;
1378       memset(&decoded_buffer_[*decoded_length], 0,
1379              decoder_frame_length_ * decoder->Channels() *
1380                  sizeof(decoded_buffer_[0]));
1381       decode_length = rtc::checked_cast<int>(decoder_frame_length_);
1382     } else if (!packet->primary) {
1383       // This is a redundant payload; call the special decoder method.
1384       LOG(LS_VERBOSE) << "Decoding packet (redundant):" <<
1385           " ts=" << packet->header.timestamp <<
1386           ", sn=" << packet->header.sequenceNumber <<
1387           ", pt=" << static_cast<int>(packet->header.payloadType) <<
1388           ", ssrc=" << packet->header.ssrc <<
1389           ", len=" << packet->payload_length;
1390       decode_length = decoder->DecodeRedundant(
1391           packet->payload, packet->payload_length, fs_hz_,
1392           (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1393           &decoded_buffer_[*decoded_length], speech_type);
1394     } else {
1395       LOG(LS_VERBOSE) << "Decoding packet: ts=" << packet->header.timestamp <<
1396           ", sn=" << packet->header.sequenceNumber <<
1397           ", pt=" << static_cast<int>(packet->header.payloadType) <<
1398           ", ssrc=" << packet->header.ssrc <<
1399           ", len=" << packet->payload_length;
1400       decode_length =
1401           decoder->Decode(
1402               packet->payload, packet->payload_length, fs_hz_,
1403               (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1404               &decoded_buffer_[*decoded_length], speech_type);
1405     }
1406 
1407     delete[] packet->payload;
1408     delete packet;
1409     packet = NULL;
1410     if (decode_length > 0) {
1411       *decoded_length += decode_length;
1412       // Update |decoder_frame_length_| with number of samples per channel.
1413       decoder_frame_length_ =
1414           static_cast<size_t>(decode_length) / decoder->Channels();
1415       LOG(LS_VERBOSE) << "Decoded " << decode_length << " samples ("
1416                       << decoder->Channels() << " channel(s) -> "
1417                       << decoder_frame_length_ << " samples per channel)";
1418     } else if (decode_length < 0) {
1419       // Error.
1420       LOG(LS_WARNING) << "Decode " << decode_length << " " << payload_length;
1421       *decoded_length = -1;
1422       PacketBuffer::DeleteAllPackets(packet_list);
1423       break;
1424     }
1425     if (*decoded_length > static_cast<int>(decoded_buffer_length_)) {
1426       // Guard against overflow.
1427       LOG(LS_WARNING) << "Decoded too much.";
1428       PacketBuffer::DeleteAllPackets(packet_list);
1429       return kDecodedTooMuch;
1430     }
1431     if (!packet_list->empty()) {
1432       packet = packet_list->front();
1433     } else {
1434       packet = NULL;
1435     }
1436   }  // End of decode loop.
1437 
1438   // If the list is not empty at this point, either a decoding error terminated
1439   // the while-loop, or list must hold exactly one CNG packet.
1440   assert(packet_list->empty() || *decoded_length < 0 ||
1441          (packet_list->size() == 1 && packet &&
1442              decoder_database_->IsComfortNoise(packet->header.payloadType)));
1443   return 0;
1444 }
1445 
DoNormal(const int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1446 void NetEqImpl::DoNormal(const int16_t* decoded_buffer, size_t decoded_length,
1447                          AudioDecoder::SpeechType speech_type, bool play_dtmf) {
1448   assert(normal_.get());
1449   assert(mute_factor_array_.get());
1450   normal_->Process(decoded_buffer, decoded_length, last_mode_,
1451                    mute_factor_array_.get(), algorithm_buffer_.get());
1452   if (decoded_length != 0) {
1453     last_mode_ = kModeNormal;
1454   }
1455 
1456   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1457   if ((speech_type == AudioDecoder::kComfortNoise)
1458       || ((last_mode_ == kModeCodecInternalCng)
1459           && (decoded_length == 0))) {
1460     // TODO(hlundin): Remove second part of || statement above.
1461     last_mode_ = kModeCodecInternalCng;
1462   }
1463 
1464   if (!play_dtmf) {
1465     dtmf_tone_generator_->Reset();
1466   }
1467 }
1468 
DoMerge(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1469 void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length,
1470                         AudioDecoder::SpeechType speech_type, bool play_dtmf) {
1471   assert(mute_factor_array_.get());
1472   assert(merge_.get());
1473   size_t new_length = merge_->Process(decoded_buffer, decoded_length,
1474                                       mute_factor_array_.get(),
1475                                       algorithm_buffer_.get());
1476   size_t expand_length_correction = new_length -
1477       decoded_length / algorithm_buffer_->Channels();
1478 
1479   // Update in-call and post-call statistics.
1480   if (expand_->MuteFactor(0) == 0) {
1481     // Expand generates only noise.
1482     stats_.ExpandedNoiseSamples(expand_length_correction);
1483   } else {
1484     // Expansion generates more than only noise.
1485     stats_.ExpandedVoiceSamples(expand_length_correction);
1486   }
1487 
1488   last_mode_ = kModeMerge;
1489   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1490   if (speech_type == AudioDecoder::kComfortNoise) {
1491     last_mode_ = kModeCodecInternalCng;
1492   }
1493   expand_->Reset();
1494   if (!play_dtmf) {
1495     dtmf_tone_generator_->Reset();
1496   }
1497 }
1498 
DoExpand(bool play_dtmf)1499 int NetEqImpl::DoExpand(bool play_dtmf) {
1500   while ((sync_buffer_->FutureLength() - expand_->overlap_length()) <
1501       output_size_samples_) {
1502     algorithm_buffer_->Clear();
1503     int return_value = expand_->Process(algorithm_buffer_.get());
1504     size_t length = algorithm_buffer_->Size();
1505 
1506     // Update in-call and post-call statistics.
1507     if (expand_->MuteFactor(0) == 0) {
1508       // Expand operation generates only noise.
1509       stats_.ExpandedNoiseSamples(length);
1510     } else {
1511       // Expand operation generates more than only noise.
1512       stats_.ExpandedVoiceSamples(length);
1513     }
1514 
1515     last_mode_ = kModeExpand;
1516 
1517     if (return_value < 0) {
1518       return return_value;
1519     }
1520 
1521     sync_buffer_->PushBack(*algorithm_buffer_);
1522     algorithm_buffer_->Clear();
1523   }
1524   if (!play_dtmf) {
1525     dtmf_tone_generator_->Reset();
1526   }
1527   return 0;
1528 }
1529 
DoAccelerate(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf,bool fast_accelerate)1530 int NetEqImpl::DoAccelerate(int16_t* decoded_buffer,
1531                             size_t decoded_length,
1532                             AudioDecoder::SpeechType speech_type,
1533                             bool play_dtmf,
1534                             bool fast_accelerate) {
1535   const size_t required_samples =
1536       static_cast<size_t>(240 * fs_mult_);  // Must have 30 ms.
1537   size_t borrowed_samples_per_channel = 0;
1538   size_t num_channels = algorithm_buffer_->Channels();
1539   size_t decoded_length_per_channel = decoded_length / num_channels;
1540   if (decoded_length_per_channel < required_samples) {
1541     // Must move data from the |sync_buffer_| in order to get 30 ms.
1542     borrowed_samples_per_channel = static_cast<int>(required_samples -
1543         decoded_length_per_channel);
1544     memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
1545             decoded_buffer,
1546             sizeof(int16_t) * decoded_length);
1547     sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel,
1548                                          decoded_buffer);
1549     decoded_length = required_samples * num_channels;
1550   }
1551 
1552   size_t samples_removed;
1553   Accelerate::ReturnCodes return_code =
1554       accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate,
1555                            algorithm_buffer_.get(), &samples_removed);
1556   stats_.AcceleratedSamples(samples_removed);
1557   switch (return_code) {
1558     case Accelerate::kSuccess:
1559       last_mode_ = kModeAccelerateSuccess;
1560       break;
1561     case Accelerate::kSuccessLowEnergy:
1562       last_mode_ = kModeAccelerateLowEnergy;
1563       break;
1564     case Accelerate::kNoStretch:
1565       last_mode_ = kModeAccelerateFail;
1566       break;
1567     case Accelerate::kError:
1568       // TODO(hlundin): Map to kModeError instead?
1569       last_mode_ = kModeAccelerateFail;
1570       return kAccelerateError;
1571   }
1572 
1573   if (borrowed_samples_per_channel > 0) {
1574     // Copy borrowed samples back to the |sync_buffer_|.
1575     size_t length = algorithm_buffer_->Size();
1576     if (length < borrowed_samples_per_channel) {
1577       // This destroys the beginning of the buffer, but will not cause any
1578       // problems.
1579       sync_buffer_->ReplaceAtIndex(*algorithm_buffer_,
1580                                    sync_buffer_->Size() -
1581                                    borrowed_samples_per_channel);
1582       sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length);
1583       algorithm_buffer_->PopFront(length);
1584       assert(algorithm_buffer_->Empty());
1585     } else {
1586       sync_buffer_->ReplaceAtIndex(*algorithm_buffer_,
1587                                    borrowed_samples_per_channel,
1588                                    sync_buffer_->Size() -
1589                                    borrowed_samples_per_channel);
1590       algorithm_buffer_->PopFront(borrowed_samples_per_channel);
1591     }
1592   }
1593 
1594   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1595   if (speech_type == AudioDecoder::kComfortNoise) {
1596     last_mode_ = kModeCodecInternalCng;
1597   }
1598   if (!play_dtmf) {
1599     dtmf_tone_generator_->Reset();
1600   }
1601   expand_->Reset();
1602   return 0;
1603 }
1604 
DoPreemptiveExpand(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1605 int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer,
1606                                   size_t decoded_length,
1607                                   AudioDecoder::SpeechType speech_type,
1608                                   bool play_dtmf) {
1609   const size_t required_samples =
1610       static_cast<size_t>(240 * fs_mult_);  // Must have 30 ms.
1611   size_t num_channels = algorithm_buffer_->Channels();
1612   size_t borrowed_samples_per_channel = 0;
1613   size_t old_borrowed_samples_per_channel = 0;
1614   size_t decoded_length_per_channel = decoded_length / num_channels;
1615   if (decoded_length_per_channel < required_samples) {
1616     // Must move data from the |sync_buffer_| in order to get 30 ms.
1617     borrowed_samples_per_channel =
1618         required_samples - decoded_length_per_channel;
1619     // Calculate how many of these were already played out.
1620     old_borrowed_samples_per_channel =
1621         (borrowed_samples_per_channel > sync_buffer_->FutureLength()) ?
1622         (borrowed_samples_per_channel - sync_buffer_->FutureLength()) : 0;
1623     memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
1624             decoded_buffer,
1625             sizeof(int16_t) * decoded_length);
1626     sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel,
1627                                          decoded_buffer);
1628     decoded_length = required_samples * num_channels;
1629   }
1630 
1631   size_t samples_added;
1632   PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process(
1633       decoded_buffer, decoded_length,
1634       old_borrowed_samples_per_channel,
1635       algorithm_buffer_.get(), &samples_added);
1636   stats_.PreemptiveExpandedSamples(samples_added);
1637   switch (return_code) {
1638     case PreemptiveExpand::kSuccess:
1639       last_mode_ = kModePreemptiveExpandSuccess;
1640       break;
1641     case PreemptiveExpand::kSuccessLowEnergy:
1642       last_mode_ = kModePreemptiveExpandLowEnergy;
1643       break;
1644     case PreemptiveExpand::kNoStretch:
1645       last_mode_ = kModePreemptiveExpandFail;
1646       break;
1647     case PreemptiveExpand::kError:
1648       // TODO(hlundin): Map to kModeError instead?
1649       last_mode_ = kModePreemptiveExpandFail;
1650       return kPreemptiveExpandError;
1651   }
1652 
1653   if (borrowed_samples_per_channel > 0) {
1654     // Copy borrowed samples back to the |sync_buffer_|.
1655     sync_buffer_->ReplaceAtIndex(
1656         *algorithm_buffer_, borrowed_samples_per_channel,
1657         sync_buffer_->Size() - borrowed_samples_per_channel);
1658     algorithm_buffer_->PopFront(borrowed_samples_per_channel);
1659   }
1660 
1661   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1662   if (speech_type == AudioDecoder::kComfortNoise) {
1663     last_mode_ = kModeCodecInternalCng;
1664   }
1665   if (!play_dtmf) {
1666     dtmf_tone_generator_->Reset();
1667   }
1668   expand_->Reset();
1669   return 0;
1670 }
1671 
DoRfc3389Cng(PacketList * packet_list,bool play_dtmf)1672 int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) {
1673   if (!packet_list->empty()) {
1674     // Must have exactly one SID frame at this point.
1675     assert(packet_list->size() == 1);
1676     Packet* packet = packet_list->front();
1677     packet_list->pop_front();
1678     if (!decoder_database_->IsComfortNoise(packet->header.payloadType)) {
1679 #ifdef LEGACY_BITEXACT
1680       // This can happen due to a bug in GetDecision. Change the payload type
1681       // to a CNG type, and move on. Note that this means that we are in fact
1682       // sending a non-CNG payload to the comfort noise decoder for decoding.
1683       // Clearly wrong, but will maintain bit-exactness with legacy.
1684       if (fs_hz_ == 8000) {
1685         packet->header.payloadType =
1686             decoder_database_->GetRtpPayloadType(NetEqDecoder::kDecoderCNGnb);
1687       } else if (fs_hz_ == 16000) {
1688         packet->header.payloadType =
1689             decoder_database_->GetRtpPayloadType(NetEqDecoder::kDecoderCNGwb);
1690       } else if (fs_hz_ == 32000) {
1691         packet->header.payloadType = decoder_database_->GetRtpPayloadType(
1692             NetEqDecoder::kDecoderCNGswb32kHz);
1693       } else if (fs_hz_ == 48000) {
1694         packet->header.payloadType = decoder_database_->GetRtpPayloadType(
1695             NetEqDecoder::kDecoderCNGswb48kHz);
1696       }
1697       assert(decoder_database_->IsComfortNoise(packet->header.payloadType));
1698 #else
1699       LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG.";
1700       return kOtherError;
1701 #endif
1702     }
1703     // UpdateParameters() deletes |packet|.
1704     if (comfort_noise_->UpdateParameters(packet) ==
1705         ComfortNoise::kInternalError) {
1706       algorithm_buffer_->Zeros(output_size_samples_);
1707       return -comfort_noise_->internal_error_code();
1708     }
1709   }
1710   int cn_return = comfort_noise_->Generate(output_size_samples_,
1711                                            algorithm_buffer_.get());
1712   expand_->Reset();
1713   last_mode_ = kModeRfc3389Cng;
1714   if (!play_dtmf) {
1715     dtmf_tone_generator_->Reset();
1716   }
1717   if (cn_return == ComfortNoise::kInternalError) {
1718     decoder_error_code_ = comfort_noise_->internal_error_code();
1719     return kComfortNoiseErrorCode;
1720   } else if (cn_return == ComfortNoise::kUnknownPayloadType) {
1721     return kUnknownRtpPayloadType;
1722   }
1723   return 0;
1724 }
1725 
DoCodecInternalCng(const int16_t * decoded_buffer,size_t decoded_length)1726 void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer,
1727                                    size_t decoded_length) {
1728   RTC_DCHECK(normal_.get());
1729   RTC_DCHECK(mute_factor_array_.get());
1730   normal_->Process(decoded_buffer, decoded_length, last_mode_,
1731                    mute_factor_array_.get(), algorithm_buffer_.get());
1732   last_mode_ = kModeCodecInternalCng;
1733   expand_->Reset();
1734 }
1735 
DoDtmf(const DtmfEvent & dtmf_event,bool * play_dtmf)1736 int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) {
1737   // This block of the code and the block further down, handling |dtmf_switch|
1738   // are commented out. Otherwise playing out-of-band DTMF would fail in VoE
1739   // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is
1740   // equivalent to |dtmf_switch| always be false.
1741   //
1742   // See http://webrtc-codereview.appspot.com/1195004/ for discussion
1743   // On this issue. This change might cause some glitches at the point of
1744   // switch from audio to DTMF. Issue 1545 is filed to track this.
1745   //
1746   //  bool dtmf_switch = false;
1747   //  if ((last_mode_ != kModeDtmf) && dtmf_tone_generator_->initialized()) {
1748   //    // Special case; see below.
1749   //    // We must catch this before calling Generate, since |initialized| is
1750   //    // modified in that call.
1751   //    dtmf_switch = true;
1752   //  }
1753 
1754   int dtmf_return_value = 0;
1755   if (!dtmf_tone_generator_->initialized()) {
1756     // Initialize if not already done.
1757     dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no,
1758                                                    dtmf_event.volume);
1759   }
1760 
1761   if (dtmf_return_value == 0) {
1762     // Generate DTMF signal.
1763     dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_,
1764                                                        algorithm_buffer_.get());
1765   }
1766 
1767   if (dtmf_return_value < 0) {
1768     algorithm_buffer_->Zeros(output_size_samples_);
1769     return dtmf_return_value;
1770   }
1771 
1772   //  if (dtmf_switch) {
1773   //    // This is the special case where the previous operation was DTMF
1774   //    // overdub, but the current instruction is "regular" DTMF. We must make
1775   //    // sure that the DTMF does not have any discontinuities. The first DTMF
1776   //    // sample that we generate now must be played out immediately, therefore
1777   //    // it must be copied to the speech buffer.
1778   //    // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and
1779   //    // verify correct operation.
1780   //    assert(false);
1781   //    // Must generate enough data to replace all of the |sync_buffer_|
1782   //    // "future".
1783   //    int required_length = sync_buffer_->FutureLength();
1784   //    assert(dtmf_tone_generator_->initialized());
1785   //    dtmf_return_value = dtmf_tone_generator_->Generate(required_length,
1786   //                                                       algorithm_buffer_);
1787   //    assert((size_t) required_length == algorithm_buffer_->Size());
1788   //    if (dtmf_return_value < 0) {
1789   //      algorithm_buffer_->Zeros(output_size_samples_);
1790   //      return dtmf_return_value;
1791   //    }
1792   //
1793   //    // Overwrite the "future" part of the speech buffer with the new DTMF
1794   //    // data.
1795   //    // TODO(hlundin): It seems that this overwriting has gone lost.
1796   //    // Not adapted for multi-channel yet.
1797   //    assert(algorithm_buffer_->Channels() == 1);
1798   //    if (algorithm_buffer_->Channels() != 1) {
1799   //      LOG(LS_WARNING) << "DTMF not supported for more than one channel";
1800   //      return kStereoNotSupported;
1801   //    }
1802   //    // Shuffle the remaining data to the beginning of algorithm buffer.
1803   //    algorithm_buffer_->PopFront(sync_buffer_->FutureLength());
1804   //  }
1805 
1806   sync_buffer_->IncreaseEndTimestamp(
1807       static_cast<uint32_t>(output_size_samples_));
1808   expand_->Reset();
1809   last_mode_ = kModeDtmf;
1810 
1811   // Set to false because the DTMF is already in the algorithm buffer.
1812   *play_dtmf = false;
1813   return 0;
1814 }
1815 
DoAlternativePlc(bool increase_timestamp)1816 void NetEqImpl::DoAlternativePlc(bool increase_timestamp) {
1817   AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
1818   size_t length;
1819   if (decoder && decoder->HasDecodePlc()) {
1820     // Use the decoder's packet-loss concealment.
1821     // TODO(hlundin): Will probably need a longer buffer for multi-channel.
1822     int16_t decoded_buffer[kMaxFrameSize];
1823     length = decoder->DecodePlc(1, decoded_buffer);
1824     if (length > 0)
1825       algorithm_buffer_->PushBackInterleaved(decoded_buffer, length);
1826   } else {
1827     // Do simple zero-stuffing.
1828     length = output_size_samples_;
1829     algorithm_buffer_->Zeros(length);
1830     // By not advancing the timestamp, NetEq inserts samples.
1831     stats_.AddZeros(length);
1832   }
1833   if (increase_timestamp) {
1834     sync_buffer_->IncreaseEndTimestamp(static_cast<uint32_t>(length));
1835   }
1836   expand_->Reset();
1837 }
1838 
DtmfOverdub(const DtmfEvent & dtmf_event,size_t num_channels,int16_t * output) const1839 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels,
1840                            int16_t* output) const {
1841   size_t out_index = 0;
1842   size_t overdub_length = output_size_samples_;  // Default value.
1843 
1844   if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) {
1845     // Special operation for transition from "DTMF only" to "DTMF overdub".
1846     out_index = std::min(
1847         sync_buffer_->dtmf_index() - sync_buffer_->next_index(),
1848         output_size_samples_);
1849     overdub_length = output_size_samples_ - out_index;
1850   }
1851 
1852   AudioMultiVector dtmf_output(num_channels);
1853   int dtmf_return_value = 0;
1854   if (!dtmf_tone_generator_->initialized()) {
1855     dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no,
1856                                                    dtmf_event.volume);
1857   }
1858   if (dtmf_return_value == 0) {
1859     dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length,
1860                                                        &dtmf_output);
1861     assert(overdub_length == dtmf_output.Size());
1862   }
1863   dtmf_output.ReadInterleaved(overdub_length, &output[out_index]);
1864   return dtmf_return_value < 0 ? dtmf_return_value : 0;
1865 }
1866 
ExtractPackets(size_t required_samples,PacketList * packet_list)1867 int NetEqImpl::ExtractPackets(size_t required_samples,
1868                               PacketList* packet_list) {
1869   bool first_packet = true;
1870   uint8_t prev_payload_type = 0;
1871   uint32_t prev_timestamp = 0;
1872   uint16_t prev_sequence_number = 0;
1873   bool next_packet_available = false;
1874 
1875   const RTPHeader* header = packet_buffer_->NextRtpHeader();
1876   assert(header);
1877   if (!header) {
1878     LOG(LS_ERROR) << "Packet buffer unexpectedly empty.";
1879     return -1;
1880   }
1881   uint32_t first_timestamp = header->timestamp;
1882   int extracted_samples = 0;
1883 
1884   // Packet extraction loop.
1885   do {
1886     timestamp_ = header->timestamp;
1887     size_t discard_count = 0;
1888     Packet* packet = packet_buffer_->GetNextPacket(&discard_count);
1889     // |header| may be invalid after the |packet_buffer_| operation.
1890     header = NULL;
1891     if (!packet) {
1892       LOG(LS_ERROR) << "Should always be able to extract a packet here";
1893       assert(false);  // Should always be able to extract a packet here.
1894       return -1;
1895     }
1896     stats_.PacketsDiscarded(discard_count);
1897     // Store waiting time in ms; packets->waiting_time is in "output blocks".
1898     stats_.StoreWaitingTime(packet->waiting_time * kOutputSizeMs);
1899     assert(packet->payload_length > 0);
1900     packet_list->push_back(packet);  // Store packet in list.
1901 
1902     if (first_packet) {
1903       first_packet = false;
1904       if (nack_enabled_) {
1905         RTC_DCHECK(nack_);
1906         // TODO(henrik.lundin): Should we update this for all decoded packets?
1907         nack_->UpdateLastDecodedPacket(packet->header.sequenceNumber,
1908                                        packet->header.timestamp);
1909       }
1910       prev_sequence_number = packet->header.sequenceNumber;
1911       prev_timestamp = packet->header.timestamp;
1912       prev_payload_type = packet->header.payloadType;
1913     }
1914 
1915     // Store number of extracted samples.
1916     int packet_duration = 0;
1917     AudioDecoder* decoder = decoder_database_->GetDecoder(
1918         packet->header.payloadType);
1919     if (decoder) {
1920       if (packet->sync_packet) {
1921         packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
1922       } else {
1923         if (packet->primary) {
1924           packet_duration = decoder->PacketDuration(packet->payload,
1925                                                     packet->payload_length);
1926         } else {
1927           packet_duration = decoder->
1928               PacketDurationRedundant(packet->payload, packet->payload_length);
1929           stats_.SecondaryDecodedSamples(packet_duration);
1930         }
1931       }
1932     } else {
1933       LOG(LS_WARNING) << "Unknown payload type "
1934                       << static_cast<int>(packet->header.payloadType);
1935       assert(false);
1936     }
1937     if (packet_duration <= 0) {
1938       // Decoder did not return a packet duration. Assume that the packet
1939       // contains the same number of samples as the previous one.
1940       packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
1941     }
1942     extracted_samples = packet->header.timestamp - first_timestamp +
1943         packet_duration;
1944 
1945     // Check what packet is available next.
1946     header = packet_buffer_->NextRtpHeader();
1947     next_packet_available = false;
1948     if (header && prev_payload_type == header->payloadType) {
1949       int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number;
1950       size_t ts_diff = header->timestamp - prev_timestamp;
1951       if (seq_no_diff == 1 ||
1952           (seq_no_diff == 0 && ts_diff == decoder_frame_length_)) {
1953         // The next sequence number is available, or the next part of a packet
1954         // that was split into pieces upon insertion.
1955         next_packet_available = true;
1956       }
1957       prev_sequence_number = header->sequenceNumber;
1958     }
1959   } while (extracted_samples < rtc::checked_cast<int>(required_samples) &&
1960            next_packet_available);
1961 
1962   if (extracted_samples > 0) {
1963     // Delete old packets only when we are going to decode something. Otherwise,
1964     // we could end up in the situation where we never decode anything, since
1965     // all incoming packets are considered too old but the buffer will also
1966     // never be flooded and flushed.
1967     packet_buffer_->DiscardAllOldPackets(timestamp_);
1968   }
1969 
1970   return extracted_samples;
1971 }
1972 
UpdatePlcComponents(int fs_hz,size_t channels)1973 void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) {
1974   // Delete objects and create new ones.
1975   expand_.reset(expand_factory_->Create(background_noise_.get(),
1976                                         sync_buffer_.get(), &random_vector_,
1977                                         &stats_, fs_hz, channels));
1978   merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get()));
1979 }
1980 
SetSampleRateAndChannels(int fs_hz,size_t channels)1981 void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
1982   LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " " << channels;
1983   // TODO(hlundin): Change to an enumerator and skip assert.
1984   assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz ==  32000 || fs_hz == 48000);
1985   assert(channels > 0);
1986 
1987   fs_hz_ = fs_hz;
1988   fs_mult_ = fs_hz / 8000;
1989   output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
1990   decoder_frame_length_ = 3 * output_size_samples_;  // Initialize to 30ms.
1991 
1992   last_mode_ = kModeNormal;
1993 
1994   // Create a new array of mute factors and set all to 1.
1995   mute_factor_array_.reset(new int16_t[channels]);
1996   for (size_t i = 0; i < channels; ++i) {
1997     mute_factor_array_[i] = 16384;  // 1.0 in Q14.
1998   }
1999 
2000   AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
2001   if (cng_decoder)
2002     cng_decoder->Reset();
2003 
2004   // Reinit post-decode VAD with new sample rate.
2005   assert(vad_.get());  // Cannot be NULL here.
2006   vad_->Init();
2007 
2008   // Delete algorithm buffer and create a new one.
2009   algorithm_buffer_.reset(new AudioMultiVector(channels));
2010 
2011   // Delete sync buffer and create a new one.
2012   sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_));
2013 
2014   // Delete BackgroundNoise object and create a new one.
2015   background_noise_.reset(new BackgroundNoise(channels));
2016   background_noise_->set_mode(background_noise_mode_);
2017 
2018   // Reset random vector.
2019   random_vector_.Reset();
2020 
2021   UpdatePlcComponents(fs_hz, channels);
2022 
2023   // Move index so that we create a small set of future samples (all 0).
2024   sync_buffer_->set_next_index(sync_buffer_->next_index() -
2025       expand_->overlap_length());
2026 
2027   normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_,
2028                            expand_.get()));
2029   accelerate_.reset(
2030       accelerate_factory_->Create(fs_hz, channels, *background_noise_));
2031   preemptive_expand_.reset(preemptive_expand_factory_->Create(
2032       fs_hz, channels, *background_noise_, expand_->overlap_length()));
2033 
2034   // Delete ComfortNoise object and create a new one.
2035   comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(),
2036                                         sync_buffer_.get()));
2037 
2038   // Verify that |decoded_buffer_| is long enough.
2039   if (decoded_buffer_length_ < kMaxFrameSize * channels) {
2040     // Reallocate to larger size.
2041     decoded_buffer_length_ = kMaxFrameSize * channels;
2042     decoded_buffer_.reset(new int16_t[decoded_buffer_length_]);
2043   }
2044 
2045   // Create DecisionLogic if it is not created yet, then communicate new sample
2046   // rate and output size to DecisionLogic object.
2047   if (!decision_logic_.get()) {
2048     CreateDecisionLogic();
2049   }
2050   decision_logic_->SetSampleRate(fs_hz_, output_size_samples_);
2051 }
2052 
LastOutputType()2053 NetEqOutputType NetEqImpl::LastOutputType() {
2054   assert(vad_.get());
2055   assert(expand_.get());
2056   if (last_mode_ == kModeCodecInternalCng || last_mode_ == kModeRfc3389Cng) {
2057     return kOutputCNG;
2058   } else if (last_mode_ == kModeExpand && expand_->MuteFactor(0) == 0) {
2059     // Expand mode has faded down to background noise only (very long expand).
2060     return kOutputPLCtoCNG;
2061   } else if (last_mode_ == kModeExpand) {
2062     return kOutputPLC;
2063   } else if (vad_->running() && !vad_->active_speech()) {
2064     return kOutputVADPassive;
2065   } else {
2066     return kOutputNormal;
2067   }
2068 }
2069 
CreateDecisionLogic()2070 void NetEqImpl::CreateDecisionLogic() {
2071   decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_,
2072                                               playout_mode_,
2073                                               decoder_database_.get(),
2074                                               *packet_buffer_.get(),
2075                                               delay_manager_.get(),
2076                                               buffer_level_filter_.get()));
2077 }
2078 }  // namespace webrtc
2079