1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_NETEQ_NETEQ_H_ 12 #define API_NETEQ_NETEQ_H_ 13 14 #include <stddef.h> // Provide access to size_t. 15 16 #include <map> 17 #include <string> 18 #include <vector> 19 20 #include "absl/types/optional.h" 21 #include "api/audio_codecs/audio_codec_pair_id.h" 22 #include "api/audio_codecs/audio_decoder.h" 23 #include "api/audio_codecs/audio_format.h" 24 #include "api/rtp_headers.h" 25 #include "api/scoped_refptr.h" 26 27 namespace webrtc { 28 29 // Forward declarations. 30 class AudioFrame; 31 class AudioDecoderFactory; 32 class Clock; 33 34 struct NetEqNetworkStatistics { 35 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms. 36 uint16_t preferred_buffer_size_ms; // Target buffer size in ms. 37 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky 38 // jitter; 0 otherwise. 39 uint16_t expand_rate; // Fraction (of original stream) of synthesized 40 // audio inserted through expansion (in Q14). 41 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized 42 // speech inserted through expansion (in Q14). 43 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive 44 // expansion (in Q14). 45 uint16_t accelerate_rate; // Fraction of data removed through acceleration 46 // (in Q14). 47 uint16_t secondary_decoded_rate; // Fraction of data coming from FEC/RED 48 // decoding (in Q14). 49 uint16_t secondary_discarded_rate; // Fraction of discarded FEC/RED data (in 50 // Q14). 51 // Statistics for packet waiting times, i.e., the time between a packet 52 // arrives until it is decoded. 53 int mean_waiting_time_ms; 54 int median_waiting_time_ms; 55 int min_waiting_time_ms; 56 int max_waiting_time_ms; 57 }; 58 59 // NetEq statistics that persist over the lifetime of the class. 60 // These metrics are never reset. 61 struct NetEqLifetimeStatistics { 62 // Stats below correspond to similarly-named fields in the WebRTC stats spec. 63 // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats 64 uint64_t total_samples_received = 0; 65 uint64_t concealed_samples = 0; 66 uint64_t concealment_events = 0; 67 uint64_t jitter_buffer_delay_ms = 0; 68 uint64_t jitter_buffer_emitted_count = 0; 69 uint64_t jitter_buffer_target_delay_ms = 0; 70 uint64_t inserted_samples_for_deceleration = 0; 71 uint64_t removed_samples_for_acceleration = 0; 72 uint64_t silent_concealed_samples = 0; 73 uint64_t fec_packets_received = 0; 74 uint64_t fec_packets_discarded = 0; 75 // Below stats are not part of the spec. 76 uint64_t delayed_packet_outage_samples = 0; 77 // This is sum of relative packet arrival delays of received packets so far. 78 // Since end-to-end delay of a packet is difficult to measure and is not 79 // necessarily useful for measuring jitter buffer performance, we report a 80 // relative packet arrival delay. The relative packet arrival delay of a 81 // packet is defined as the arrival delay compared to the first packet 82 // received, given that it had zero delay. To avoid clock drift, the "first" 83 // packet can be made dynamic. 84 uint64_t relative_packet_arrival_delay_ms = 0; 85 uint64_t jitter_buffer_packets_received = 0; 86 // An interruption is a loss-concealment event lasting at least 150 ms. The 87 // two stats below count the number os such events and the total duration of 88 // these events. 89 int32_t interruption_count = 0; 90 int32_t total_interruption_duration_ms = 0; 91 }; 92 93 // Metrics that describe the operations performed in NetEq, and the internal 94 // state. 95 struct NetEqOperationsAndState { 96 // These sample counters are cumulative, and don't reset. As a reference, the 97 // total number of output samples can be found in 98 // NetEqLifetimeStatistics::total_samples_received. 99 uint64_t preemptive_samples = 0; 100 uint64_t accelerate_samples = 0; 101 // Count of the number of buffer flushes. 102 uint64_t packet_buffer_flushes = 0; 103 // The number of primary packets that were discarded. 104 uint64_t discarded_primary_packets = 0; 105 // The statistics below are not cumulative. 106 // The waiting time of the last decoded packet. 107 uint64_t last_waiting_time_ms = 0; 108 // The sum of the packet and jitter buffer size in ms. 109 uint64_t current_buffer_size_ms = 0; 110 // The current frame size in ms. 111 uint64_t current_frame_size_ms = 0; 112 // Flag to indicate that the next packet is available. 113 bool next_packet_available = false; 114 }; 115 116 // This is the interface class for NetEq. 117 class NetEq { 118 public: 119 struct Config { 120 Config(); 121 Config(const Config&); 122 Config(Config&&); 123 ~Config(); 124 Config& operator=(const Config&); 125 Config& operator=(Config&&); 126 127 std::string ToString() const; 128 129 int sample_rate_hz = 16000; // Initial value. Will change with input data. 130 bool enable_post_decode_vad = false; 131 size_t max_packets_in_buffer = 200; 132 int max_delay_ms = 0; 133 int min_delay_ms = 0; 134 bool enable_fast_accelerate = false; 135 bool enable_muted_state = false; 136 bool enable_rtx_handling = false; 137 absl::optional<AudioCodecPairId> codec_pair_id; 138 bool for_test_no_time_stretching = false; // Use only for testing. 139 // Adds extra delay to the output of NetEq, without affecting jitter or 140 // loss behavior. This is mainly for testing. Value must be a non-negative 141 // multiple of 10 ms. 142 int extra_output_delay_ms = 0; 143 }; 144 145 enum ReturnCodes { kOK = 0, kFail = -1 }; 146 147 enum class Operation { 148 kNormal, 149 kMerge, 150 kExpand, 151 kAccelerate, 152 kFastAccelerate, 153 kPreemptiveExpand, 154 kRfc3389Cng, 155 kRfc3389CngNoPacket, 156 kCodecInternalCng, 157 kDtmf, 158 kUndefined, 159 }; 160 161 enum class Mode { 162 kNormal, 163 kExpand, 164 kMerge, 165 kAccelerateSuccess, 166 kAccelerateLowEnergy, 167 kAccelerateFail, 168 kPreemptiveExpandSuccess, 169 kPreemptiveExpandLowEnergy, 170 kPreemptiveExpandFail, 171 kRfc3389Cng, 172 kCodecInternalCng, 173 kCodecPlc, 174 kDtmf, 175 kError, 176 kUndefined, 177 }; 178 179 // Return type for GetDecoderFormat. 180 struct DecoderFormat { 181 int sample_rate_hz; 182 int num_channels; 183 SdpAudioFormat sdp_format; 184 }; 185 186 // Creates a new NetEq object, with parameters set in |config|. The |config| 187 // object will only have to be valid for the duration of the call to this 188 // method. 189 static NetEq* Create( 190 const NetEq::Config& config, 191 Clock* clock, 192 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory); 193 ~NetEq()194 virtual ~NetEq() {} 195 196 // Inserts a new packet into NetEq. 197 // Returns 0 on success, -1 on failure. 198 virtual int InsertPacket(const RTPHeader& rtp_header, 199 rtc::ArrayView<const uint8_t> payload) = 0; 200 201 // Lets NetEq know that a packet arrived with an empty payload. This typically 202 // happens when empty packets are used for probing the network channel, and 203 // these packets use RTP sequence numbers from the same series as the actual 204 // audio packets. 205 virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0; 206 207 // Instructs NetEq to deliver 10 ms of audio data. The data is written to 208 // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|, 209 // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and 210 // |vad_activity_| are updated upon success. If an error is returned, some 211 // fields may not have been updated, or may contain inconsistent values. 212 // If muted state is enabled (through Config::enable_muted_state), |muted| 213 // may be set to true after a prolonged expand period. When this happens, the 214 // |data_| in |audio_frame| is not written, but should be interpreted as being 215 // all zeros. For testing purposes, an override can be supplied in the 216 // |action_override| argument, which will cause NetEq to take this action 217 // next, instead of the action it would normally choose. 218 // Returns kOK on success, or kFail in case of an error. 219 virtual int GetAudio( 220 AudioFrame* audio_frame, 221 bool* muted, 222 absl::optional<Operation> action_override = absl::nullopt) = 0; 223 224 // Replaces the current set of decoders with the given one. 225 virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0; 226 227 // Associates |rtp_payload_type| with the given codec, which NetEq will 228 // instantiate when it needs it. Returns true iff successful. 229 virtual bool RegisterPayloadType(int rtp_payload_type, 230 const SdpAudioFormat& audio_format) = 0; 231 232 // Removes |rtp_payload_type| from the codec database. Returns 0 on success, 233 // -1 on failure. Removing a payload type that is not registered is ok and 234 // will not result in an error. 235 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0; 236 237 // Removes all payload types from the codec database. 238 virtual void RemoveAllPayloadTypes() = 0; 239 240 // Sets a minimum delay in millisecond for packet buffer. The minimum is 241 // maintained unless a higher latency is dictated by channel condition. 242 // Returns true if the minimum is successfully applied, otherwise false is 243 // returned. 244 virtual bool SetMinimumDelay(int delay_ms) = 0; 245 246 // Sets a maximum delay in milliseconds for packet buffer. The latency will 247 // not exceed the given value, even required delay (given the channel 248 // conditions) is higher. Calling this method has the same effect as setting 249 // the |max_delay_ms| value in the NetEq::Config struct. 250 virtual bool SetMaximumDelay(int delay_ms) = 0; 251 252 // Sets a base minimum delay in milliseconds for packet buffer. The minimum 253 // delay which is set via |SetMinimumDelay| can't be lower than base minimum 254 // delay. Calling this method is similar to setting the |min_delay_ms| value 255 // in the NetEq::Config struct. Returns true if the base minimum is 256 // successfully applied, otherwise false is returned. 257 virtual bool SetBaseMinimumDelayMs(int delay_ms) = 0; 258 259 // Returns current value of base minimum delay in milliseconds. 260 virtual int GetBaseMinimumDelayMs() const = 0; 261 262 // Returns the current target delay in ms. This includes any extra delay 263 // requested through SetMinimumDelay. 264 virtual int TargetDelayMs() const = 0; 265 266 // Returns the current total delay (packet buffer and sync buffer) in ms, 267 // with smoothing applied to even out short-time fluctuations due to jitter. 268 // The packet buffer part of the delay is not updated during DTX/CNG periods. 269 virtual int FilteredCurrentDelayMs() const = 0; 270 271 // Writes the current network statistics to |stats|. The statistics are reset 272 // after the call. 273 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0; 274 275 // Current values only, not resetting any state. 276 virtual NetEqNetworkStatistics CurrentNetworkStatistics() const = 0; 277 278 // Returns a copy of this class's lifetime statistics. These statistics are 279 // never reset. 280 virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0; 281 282 // Returns statistics about the performed operations and internal state. These 283 // statistics are never reset. 284 virtual NetEqOperationsAndState GetOperationsAndState() const = 0; 285 286 // Enables post-decode VAD. When enabled, GetAudio() will return 287 // kOutputVADPassive when the signal contains no speech. 288 virtual void EnableVad() = 0; 289 290 // Disables post-decode VAD. 291 virtual void DisableVad() = 0; 292 293 // Returns the RTP timestamp for the last sample delivered by GetAudio(). 294 // The return value will be empty if no valid timestamp is available. 295 virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0; 296 297 // Returns the sample rate in Hz of the audio produced in the last GetAudio 298 // call. If GetAudio has not been called yet, the configured sample rate 299 // (Config::sample_rate_hz) is returned. 300 virtual int last_output_sample_rate_hz() const = 0; 301 302 // Returns the decoder info for the given payload type. Returns empty if no 303 // such payload type was registered. 304 virtual absl::optional<DecoderFormat> GetDecoderFormat( 305 int payload_type) const = 0; 306 307 // Flushes both the packet buffer and the sync buffer. 308 virtual void FlushBuffers() = 0; 309 310 // Enables NACK and sets the maximum size of the NACK list, which should be 311 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already 312 // enabled then the maximum NACK list size is modified accordingly. 313 virtual void EnableNack(size_t max_nack_list_size) = 0; 314 315 virtual void DisableNack() = 0; 316 317 // Returns a list of RTP sequence numbers corresponding to packets to be 318 // retransmitted, given an estimate of the round-trip time in milliseconds. 319 virtual std::vector<uint16_t> GetNackList( 320 int64_t round_trip_time_ms) const = 0; 321 322 // Returns a vector containing the timestamps of the packets that were decoded 323 // in the last GetAudio call. If no packets were decoded in the last call, the 324 // vector is empty. 325 // Mainly intended for testing. 326 virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0; 327 328 // Returns the length of the audio yet to play in the sync buffer. 329 // Mainly intended for testing. 330 virtual int SyncBufferSizeMs() const = 0; 331 }; 332 333 } // namespace webrtc 334 #endif // API_NETEQ_NETEQ_H_ 335