1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 12 #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 13 14 #include <memory> 15 #include <string> 16 17 #include "api/optional.h" 18 #include "modules/audio_coding/neteq/audio_multi_vector.h" 19 #include "modules/audio_coding/neteq/defines.h" 20 #include "modules/audio_coding/neteq/include/neteq.h" 21 #include "modules/audio_coding/neteq/packet.h" // Declare PacketList. 22 #include "modules/audio_coding/neteq/random_vector.h" 23 #include "modules/audio_coding/neteq/rtcp.h" 24 #include "modules/audio_coding/neteq/statistics_calculator.h" 25 #include "modules/audio_coding/neteq/tick_timer.h" 26 #include "modules/include/module_common_types.h" 27 #include "rtc_base/constructormagic.h" 28 #include "rtc_base/criticalsection.h" 29 #include "rtc_base/thread_annotations.h" 30 #include "typedefs.h" // NOLINT(build/include) 31 32 namespace webrtc { 33 34 // Forward declarations. 35 class Accelerate; 36 class BackgroundNoise; 37 class BufferLevelFilter; 38 class ComfortNoise; 39 class DecisionLogic; 40 class DecoderDatabase; 41 class DelayManager; 42 class DelayPeakDetector; 43 class DtmfBuffer; 44 class DtmfToneGenerator; 45 class Expand; 46 class Merge; 47 class NackTracker; 48 class Normal; 49 class PacketBuffer; 50 class RedPayloadSplitter; 51 class PostDecodeVad; 52 class PreemptiveExpand; 53 class RandomVector; 54 class SyncBuffer; 55 class TimestampScaler; 56 struct AccelerateFactory; 57 struct DtmfEvent; 58 struct ExpandFactory; 59 struct PreemptiveExpandFactory; 60 61 class NetEqImpl : public webrtc::NetEq { 62 public: 63 enum class OutputType { 64 kNormalSpeech, 65 kPLC, 66 kCNG, 67 kPLCCNG, 68 kVadPassive 69 }; 70 71 enum ErrorCodes { 72 kNoError = 0, 73 kOtherError, 74 kUnknownRtpPayloadType, 75 kDecoderNotFound, 76 kInvalidPointer, 77 kAccelerateError, 78 kPreemptiveExpandError, 79 kComfortNoiseErrorCode, 80 kDecoderErrorCode, 81 kOtherDecoderError, 82 kInvalidOperation, 83 kDtmfParsingError, 84 kDtmfInsertError, 85 kSampleUnderrun, 86 kDecodedTooMuch, 87 kRedundancySplitError, 88 kPacketBufferCorruption 89 }; 90 91 struct Dependencies { 92 // The constructor populates the Dependencies struct with the default 93 // implementations of the objects. They can all be replaced by the user 94 // before sending the struct to the NetEqImpl constructor. However, there 95 // are dependencies between some of the classes inside the struct, so 96 // swapping out one may make it necessary to re-create another one. 97 explicit Dependencies( 98 const NetEq::Config& config, 99 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory); 100 ~Dependencies(); 101 102 std::unique_ptr<TickTimer> tick_timer; 103 std::unique_ptr<BufferLevelFilter> buffer_level_filter; 104 std::unique_ptr<DecoderDatabase> decoder_database; 105 std::unique_ptr<DelayPeakDetector> delay_peak_detector; 106 std::unique_ptr<DelayManager> delay_manager; 107 std::unique_ptr<DtmfBuffer> dtmf_buffer; 108 std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator; 109 std::unique_ptr<PacketBuffer> packet_buffer; 110 std::unique_ptr<RedPayloadSplitter> red_payload_splitter; 111 std::unique_ptr<TimestampScaler> timestamp_scaler; 112 std::unique_ptr<AccelerateFactory> accelerate_factory; 113 std::unique_ptr<ExpandFactory> expand_factory; 114 std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory; 115 }; 116 117 // Creates a new NetEqImpl object. 118 NetEqImpl(const NetEq::Config& config, 119 Dependencies&& deps, 120 bool create_components = true); 121 122 ~NetEqImpl() override; 123 124 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication 125 // of the time when the packet was received, and should be measured with 126 // the same tick rate as the RTP timestamp of the current payload. 127 // Returns 0 on success, -1 on failure. 128 int InsertPacket(const RTPHeader& rtp_header, 129 rtc::ArrayView<const uint8_t> payload, 130 uint32_t receive_timestamp) override; 131 132 void InsertEmptyPacket(const RTPHeader& rtp_header) override; 133 134 int GetAudio(AudioFrame* audio_frame, bool* muted) override; 135 136 void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override; 137 138 int RegisterPayloadType(NetEqDecoder codec, 139 const std::string& codec_name, 140 uint8_t rtp_payload_type) override; 141 142 int RegisterExternalDecoder(AudioDecoder* decoder, 143 NetEqDecoder codec, 144 const std::string& codec_name, 145 uint8_t rtp_payload_type) override; 146 147 bool RegisterPayloadType(int rtp_payload_type, 148 const SdpAudioFormat& audio_format) override; 149 150 // Removes |rtp_payload_type| from the codec database. Returns 0 on success, 151 // -1 on failure. 152 int RemovePayloadType(uint8_t rtp_payload_type) override; 153 154 void RemoveAllPayloadTypes() override; 155 156 bool SetMinimumDelay(int delay_ms) override; 157 158 bool SetMaximumDelay(int delay_ms) override; 159 160 int LeastRequiredDelayMs() const override; 161 162 int SetTargetDelay() override; 163 164 int TargetDelayMs() const override; 165 166 int CurrentDelayMs() const override; 167 168 int FilteredCurrentDelayMs() const override; 169 170 // Sets the playout mode to |mode|. 171 // Deprecated. 172 // TODO(henrik.lundin) Delete. 173 void SetPlayoutMode(NetEqPlayoutMode mode) override; 174 175 // Returns the current playout mode. 176 // Deprecated. 177 // TODO(henrik.lundin) Delete. 178 NetEqPlayoutMode PlayoutMode() const override; 179 180 // Writes the current network statistics to |stats|. The statistics are reset 181 // after the call. 182 int NetworkStatistics(NetEqNetworkStatistics* stats) override; 183 184 // Writes the current RTCP statistics to |stats|. The statistics are reset 185 // and a new report period is started with the call. 186 void GetRtcpStatistics(RtcpStatistics* stats) override; 187 188 NetEqLifetimeStatistics GetLifetimeStatistics() const override; 189 190 // Same as RtcpStatistics(), but does not reset anything. 191 void GetRtcpStatisticsNoReset(RtcpStatistics* stats) override; 192 193 // Enables post-decode VAD. When enabled, GetAudio() will return 194 // kOutputVADPassive when the signal contains no speech. 195 void EnableVad() override; 196 197 // Disables post-decode VAD. 198 void DisableVad() override; 199 200 rtc::Optional<uint32_t> GetPlayoutTimestamp() const override; 201 202 int last_output_sample_rate_hz() const override; 203 204 rtc::Optional<CodecInst> GetDecoder(int payload_type) const override; 205 206 rtc::Optional<SdpAudioFormat> GetDecoderFormat( 207 int payload_type) const override; 208 209 int SetTargetNumberOfChannels() override; 210 211 int SetTargetSampleRate() override; 212 213 // Flushes both the packet buffer and the sync buffer. 214 void FlushBuffers() override; 215 216 void PacketBufferStatistics(int* current_num_packets, 217 int* max_num_packets) const override; 218 219 void EnableNack(size_t max_nack_list_size) override; 220 221 void DisableNack() override; 222 223 std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override; 224 225 std::vector<uint32_t> LastDecodedTimestamps() const override; 226 227 int SyncBufferSizeMs() const override; 228 229 // This accessor method is only intended for testing purposes. 230 const SyncBuffer* sync_buffer_for_test() const; 231 Operations last_operation_for_test() const; 232 233 protected: 234 static const int kOutputSizeMs = 10; 235 static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz. 236 // TODO(hlundin): Provide a better value for kSyncBufferSize. 237 // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for 238 // calculating correlations of current frame against history. 239 static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48; 240 241 // Inserts a new packet into NetEq. This is used by the InsertPacket method 242 // above. Returns 0 on success, otherwise an error code. 243 // TODO(hlundin): Merge this with InsertPacket above? 244 int InsertPacketInternal(const RTPHeader& rtp_header, 245 rtc::ArrayView<const uint8_t> payload, 246 uint32_t receive_timestamp) 247 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 248 249 // Delivers 10 ms of audio data. The data is written to |audio_frame|. 250 // Returns 0 on success, otherwise an error code. 251 int GetAudioInternal(AudioFrame* audio_frame, bool* muted) 252 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 253 254 // Provides a decision to the GetAudioInternal method. The decision what to 255 // do is written to |operation|. Packets to decode are written to 256 // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When 257 // DTMF should be played, |play_dtmf| is set to true by the method. 258 // Returns 0 on success, otherwise an error code. 259 int GetDecision(Operations* operation, 260 PacketList* packet_list, 261 DtmfEvent* dtmf_event, 262 bool* play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 263 264 // Decodes the speech packets in |packet_list|, and writes the results to 265 // |decoded_buffer|, which is allocated to hold |decoded_buffer_length| 266 // elements. The length of the decoded data is written to |decoded_length|. 267 // The speech type -- speech or (codec-internal) comfort noise -- is written 268 // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389 269 // comfort noise, those are not decoded. 270 int Decode(PacketList* packet_list, 271 Operations* operation, 272 int* decoded_length, 273 AudioDecoder::SpeechType* speech_type) 274 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 275 276 // Sub-method to Decode(). Performs codec internal CNG. 277 int DecodeCng(AudioDecoder* decoder, 278 int* decoded_length, 279 AudioDecoder::SpeechType* speech_type) 280 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 281 282 // Sub-method to Decode(). Performs the actual decoding. 283 int DecodeLoop(PacketList* packet_list, 284 const Operations& operation, 285 AudioDecoder* decoder, 286 int* decoded_length, 287 AudioDecoder::SpeechType* speech_type) 288 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 289 290 // Sub-method which calls the Normal class to perform the normal operation. 291 void DoNormal(const int16_t* decoded_buffer, 292 size_t decoded_length, 293 AudioDecoder::SpeechType speech_type, 294 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 295 296 // Sub-method which calls the Merge class to perform the merge operation. 297 void DoMerge(int16_t* decoded_buffer, 298 size_t decoded_length, 299 AudioDecoder::SpeechType speech_type, 300 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 301 302 // Sub-method which calls the Expand class to perform the expand operation. 303 int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 304 305 // Sub-method which calls the Accelerate class to perform the accelerate 306 // operation. 307 int DoAccelerate(int16_t* decoded_buffer, 308 size_t decoded_length, 309 AudioDecoder::SpeechType speech_type, 310 bool play_dtmf, 311 bool fast_accelerate) 312 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 313 314 // Sub-method which calls the PreemptiveExpand class to perform the 315 // preemtive expand operation. 316 int DoPreemptiveExpand(int16_t* decoded_buffer, 317 size_t decoded_length, 318 AudioDecoder::SpeechType speech_type, 319 bool play_dtmf) 320 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 321 322 // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort 323 // noise. |packet_list| can either contain one SID frame to update the 324 // noise parameters, or no payload at all, in which case the previously 325 // received parameters are used. 326 int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) 327 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 328 329 // Calls the audio decoder to generate codec-internal comfort noise when 330 // no packet was received. 331 void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length) 332 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 333 334 // Calls the DtmfToneGenerator class to generate DTMF tones. 335 int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) 336 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 337 338 // Produces packet-loss concealment using alternative methods. If the codec 339 // has an internal PLC, it is called to generate samples. Otherwise, the 340 // method performs zero-stuffing. 341 void DoAlternativePlc(bool increase_timestamp) 342 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 343 344 // Overdub DTMF on top of |output|. 345 int DtmfOverdub(const DtmfEvent& dtmf_event, 346 size_t num_channels, 347 int16_t* output) const 348 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 349 350 // Extracts packets from |packet_buffer_| to produce at least 351 // |required_samples| samples. The packets are inserted into |packet_list|. 352 // Returns the number of samples that the packets in the list will produce, or 353 // -1 in case of an error. 354 int ExtractPackets(size_t required_samples, PacketList* packet_list) 355 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 356 357 // Resets various variables and objects to new values based on the sample rate 358 // |fs_hz| and |channels| number audio channels. 359 void SetSampleRateAndChannels(int fs_hz, size_t channels) 360 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 361 362 // Returns the output type for the audio produced by the latest call to 363 // GetAudio(). 364 OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 365 366 // Updates Expand and Merge. 367 virtual void UpdatePlcComponents(int fs_hz, size_t channels) 368 RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 369 370 // Creates DecisionLogic object with the mode given by |playout_mode_|. 371 virtual void CreateDecisionLogic() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); 372 373 rtc::CriticalSection crit_sect_; 374 const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(crit_sect_); 375 const std::unique_ptr<BufferLevelFilter> buffer_level_filter_ 376 RTC_GUARDED_BY(crit_sect_); 377 const std::unique_ptr<DecoderDatabase> decoder_database_ 378 RTC_GUARDED_BY(crit_sect_); 379 const std::unique_ptr<DelayManager> delay_manager_ RTC_GUARDED_BY(crit_sect_); 380 const std::unique_ptr<DelayPeakDetector> delay_peak_detector_ 381 RTC_GUARDED_BY(crit_sect_); 382 const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(crit_sect_); 383 const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_ 384 RTC_GUARDED_BY(crit_sect_); 385 const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(crit_sect_); 386 const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_ 387 RTC_GUARDED_BY(crit_sect_); 388 const std::unique_ptr<TimestampScaler> timestamp_scaler_ 389 RTC_GUARDED_BY(crit_sect_); 390 const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(crit_sect_); 391 const std::unique_ptr<ExpandFactory> expand_factory_ 392 RTC_GUARDED_BY(crit_sect_); 393 const std::unique_ptr<AccelerateFactory> accelerate_factory_ 394 RTC_GUARDED_BY(crit_sect_); 395 const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_ 396 RTC_GUARDED_BY(crit_sect_); 397 398 std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(crit_sect_); 399 std::unique_ptr<DecisionLogic> decision_logic_ RTC_GUARDED_BY(crit_sect_); 400 std::unique_ptr<AudioMultiVector> algorithm_buffer_ 401 RTC_GUARDED_BY(crit_sect_); 402 std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(crit_sect_); 403 std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(crit_sect_); 404 std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(crit_sect_); 405 std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(crit_sect_); 406 std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(crit_sect_); 407 std::unique_ptr<PreemptiveExpand> preemptive_expand_ 408 RTC_GUARDED_BY(crit_sect_); 409 RandomVector random_vector_ RTC_GUARDED_BY(crit_sect_); 410 std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(crit_sect_); 411 Rtcp rtcp_ RTC_GUARDED_BY(crit_sect_); 412 StatisticsCalculator stats_ RTC_GUARDED_BY(crit_sect_); 413 int fs_hz_ RTC_GUARDED_BY(crit_sect_); 414 int fs_mult_ RTC_GUARDED_BY(crit_sect_); 415 std::atomic<int> last_output_sample_rate_hz_; 416 size_t output_size_samples_ RTC_GUARDED_BY(crit_sect_); 417 size_t decoder_frame_length_ RTC_GUARDED_BY(crit_sect_); 418 Modes last_mode_ RTC_GUARDED_BY(crit_sect_); 419 Operations last_operation_ RTC_GUARDED_BY(crit_sect_); 420 std::unique_ptr<int16_t[]> mute_factor_array_ RTC_GUARDED_BY(crit_sect_); 421 size_t decoded_buffer_length_ RTC_GUARDED_BY(crit_sect_); 422 std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(crit_sect_); 423 uint32_t playout_timestamp_ RTC_GUARDED_BY(crit_sect_); 424 bool new_codec_ RTC_GUARDED_BY(crit_sect_); 425 uint32_t timestamp_ RTC_GUARDED_BY(crit_sect_); 426 bool reset_decoder_ RTC_GUARDED_BY(crit_sect_); 427 rtc::Optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(crit_sect_); 428 rtc::Optional<uint8_t> current_cng_rtp_payload_type_ 429 RTC_GUARDED_BY(crit_sect_); 430 uint32_t ssrc_ RTC_GUARDED_BY(crit_sect_); 431 bool first_packet_ RTC_GUARDED_BY(crit_sect_); 432 const BackgroundNoiseMode background_noise_mode_ RTC_GUARDED_BY(crit_sect_); 433 NetEqPlayoutMode playout_mode_ RTC_GUARDED_BY(crit_sect_); 434 bool enable_fast_accelerate_ RTC_GUARDED_BY(crit_sect_); 435 std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(crit_sect_); 436 bool nack_enabled_ RTC_GUARDED_BY(crit_sect_); 437 const bool enable_muted_state_ RTC_GUARDED_BY(crit_sect_); 438 AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(crit_sect_) = 439 AudioFrame::kVadPassive; 440 std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_ 441 RTC_GUARDED_BY(crit_sect_); 442 std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_); 443 const bool use_dtx_delay_fix_ RTC_GUARDED_BY(crit_sect_); 444 445 private: 446 RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl); 447 }; 448 449 } // namespace webrtc 450 #endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 451