1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef API_NETEQ_NETEQ_H_
12 #define API_NETEQ_NETEQ_H_
13 
14 #include <stddef.h>  // Provide access to size_t.
15 
16 #include <map>
17 #include <string>
18 #include <vector>
19 
20 #include "absl/types/optional.h"
21 #include "api/audio_codecs/audio_codec_pair_id.h"
22 #include "api/audio_codecs/audio_decoder.h"
23 #include "api/audio_codecs/audio_format.h"
24 #include "api/rtp_headers.h"
25 #include "api/scoped_refptr.h"
26 
27 namespace webrtc {
28 
29 // Forward declarations.
30 class AudioFrame;
31 class AudioDecoderFactory;
32 class Clock;
33 
34 struct NetEqNetworkStatistics {
35   uint16_t current_buffer_size_ms;    // Current jitter buffer size in ms.
36   uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
37   uint16_t jitter_peaks_found;        // 1 if adding extra delay due to peaky
38                                       // jitter; 0 otherwise.
39   uint16_t expand_rate;         // Fraction (of original stream) of synthesized
40                                 // audio inserted through expansion (in Q14).
41   uint16_t speech_expand_rate;  // Fraction (of original stream) of synthesized
42                                 // speech inserted through expansion (in Q14).
43   uint16_t preemptive_rate;     // Fraction of data inserted through pre-emptive
44                                 // expansion (in Q14).
45   uint16_t accelerate_rate;     // Fraction of data removed through acceleration
46                                 // (in Q14).
47   uint16_t secondary_decoded_rate;    // Fraction of data coming from FEC/RED
48                                       // decoding (in Q14).
49   uint16_t secondary_discarded_rate;  // Fraction of discarded FEC/RED data (in
50                                       // Q14).
51   // Statistics for packet waiting times, i.e., the time between a packet
52   // arrives until it is decoded.
53   int mean_waiting_time_ms;
54   int median_waiting_time_ms;
55   int min_waiting_time_ms;
56   int max_waiting_time_ms;
57 };
58 
59 // NetEq statistics that persist over the lifetime of the class.
60 // These metrics are never reset.
61 struct NetEqLifetimeStatistics {
62   // Stats below correspond to similarly-named fields in the WebRTC stats spec.
63   // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats
64   uint64_t total_samples_received = 0;
65   uint64_t concealed_samples = 0;
66   uint64_t concealment_events = 0;
67   uint64_t jitter_buffer_delay_ms = 0;
68   uint64_t jitter_buffer_emitted_count = 0;
69   uint64_t jitter_buffer_target_delay_ms = 0;
70   uint64_t inserted_samples_for_deceleration = 0;
71   uint64_t removed_samples_for_acceleration = 0;
72   uint64_t silent_concealed_samples = 0;
73   uint64_t fec_packets_received = 0;
74   uint64_t fec_packets_discarded = 0;
75   // Below stats are not part of the spec.
76   uint64_t delayed_packet_outage_samples = 0;
77   // This is sum of relative packet arrival delays of received packets so far.
78   // Since end-to-end delay of a packet is difficult to measure and is not
79   // necessarily useful for measuring jitter buffer performance, we report a
80   // relative packet arrival delay. The relative packet arrival delay of a
81   // packet is defined as the arrival delay compared to the first packet
82   // received, given that it had zero delay. To avoid clock drift, the "first"
83   // packet can be made dynamic.
84   uint64_t relative_packet_arrival_delay_ms = 0;
85   uint64_t jitter_buffer_packets_received = 0;
86   // An interruption is a loss-concealment event lasting at least 150 ms. The
87   // two stats below count the number os such events and the total duration of
88   // these events.
89   int32_t interruption_count = 0;
90   int32_t total_interruption_duration_ms = 0;
91 };
92 
93 // Metrics that describe the operations performed in NetEq, and the internal
94 // state.
95 struct NetEqOperationsAndState {
96   // These sample counters are cumulative, and don't reset. As a reference, the
97   // total number of output samples can be found in
98   // NetEqLifetimeStatistics::total_samples_received.
99   uint64_t preemptive_samples = 0;
100   uint64_t accelerate_samples = 0;
101   // Count of the number of buffer flushes.
102   uint64_t packet_buffer_flushes = 0;
103   // The number of primary packets that were discarded.
104   uint64_t discarded_primary_packets = 0;
105   // The statistics below are not cumulative.
106   // The waiting time of the last decoded packet.
107   uint64_t last_waiting_time_ms = 0;
108   // The sum of the packet and jitter buffer size in ms.
109   uint64_t current_buffer_size_ms = 0;
110   // The current frame size in ms.
111   uint64_t current_frame_size_ms = 0;
112   // Flag to indicate that the next packet is available.
113   bool next_packet_available = false;
114 };
115 
116 // This is the interface class for NetEq.
117 class NetEq {
118  public:
119   struct Config {
120     Config();
121     Config(const Config&);
122     Config(Config&&);
123     ~Config();
124     Config& operator=(const Config&);
125     Config& operator=(Config&&);
126 
127     std::string ToString() const;
128 
129     int sample_rate_hz = 16000;  // Initial value. Will change with input data.
130     bool enable_post_decode_vad = false;
131     size_t max_packets_in_buffer = 200;
132     int max_delay_ms = 0;
133     int min_delay_ms = 0;
134     bool enable_fast_accelerate = false;
135     bool enable_muted_state = false;
136     bool enable_rtx_handling = false;
137     absl::optional<AudioCodecPairId> codec_pair_id;
138     bool for_test_no_time_stretching = false;  // Use only for testing.
139     // Adds extra delay to the output of NetEq, without affecting jitter or
140     // loss behavior. This is mainly for testing. Value must be a non-negative
141     // multiple of 10 ms.
142     int extra_output_delay_ms = 0;
143   };
144 
145   enum ReturnCodes { kOK = 0, kFail = -1 };
146 
147   enum class Operation {
148     kNormal,
149     kMerge,
150     kExpand,
151     kAccelerate,
152     kFastAccelerate,
153     kPreemptiveExpand,
154     kRfc3389Cng,
155     kRfc3389CngNoPacket,
156     kCodecInternalCng,
157     kDtmf,
158     kUndefined,
159   };
160 
161   enum class Mode {
162     kNormal,
163     kExpand,
164     kMerge,
165     kAccelerateSuccess,
166     kAccelerateLowEnergy,
167     kAccelerateFail,
168     kPreemptiveExpandSuccess,
169     kPreemptiveExpandLowEnergy,
170     kPreemptiveExpandFail,
171     kRfc3389Cng,
172     kCodecInternalCng,
173     kCodecPlc,
174     kDtmf,
175     kError,
176     kUndefined,
177   };
178 
179   // Return type for GetDecoderFormat.
180   struct DecoderFormat {
181     int sample_rate_hz;
182     int num_channels;
183     SdpAudioFormat sdp_format;
184   };
185 
186   // Creates a new NetEq object, with parameters set in |config|. The |config|
187   // object will only have to be valid for the duration of the call to this
188   // method.
189   static NetEq* Create(
190       const NetEq::Config& config,
191       Clock* clock,
192       const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
193 
~NetEq()194   virtual ~NetEq() {}
195 
196   // Inserts a new packet into NetEq.
197   // Returns 0 on success, -1 on failure.
198   virtual int InsertPacket(const RTPHeader& rtp_header,
199                            rtc::ArrayView<const uint8_t> payload) = 0;
200 
201   // Lets NetEq know that a packet arrived with an empty payload. This typically
202   // happens when empty packets are used for probing the network channel, and
203   // these packets use RTP sequence numbers from the same series as the actual
204   // audio packets.
205   virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
206 
207   // Instructs NetEq to deliver 10 ms of audio data. The data is written to
208   // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
209   // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
210   // |vad_activity_| are updated upon success. If an error is returned, some
211   // fields may not have been updated, or may contain inconsistent values.
212   // If muted state is enabled (through Config::enable_muted_state), |muted|
213   // may be set to true after a prolonged expand period. When this happens, the
214   // |data_| in |audio_frame| is not written, but should be interpreted as being
215   // all zeros. For testing purposes, an override can be supplied in the
216   // |action_override| argument, which will cause NetEq to take this action
217   // next, instead of the action it would normally choose.
218   // Returns kOK on success, or kFail in case of an error.
219   virtual int GetAudio(
220       AudioFrame* audio_frame,
221       bool* muted,
222       absl::optional<Operation> action_override = absl::nullopt) = 0;
223 
224   // Replaces the current set of decoders with the given one.
225   virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
226 
227   // Associates |rtp_payload_type| with the given codec, which NetEq will
228   // instantiate when it needs it. Returns true iff successful.
229   virtual bool RegisterPayloadType(int rtp_payload_type,
230                                    const SdpAudioFormat& audio_format) = 0;
231 
232   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
233   // -1 on failure. Removing a payload type that is not registered is ok and
234   // will not result in an error.
235   virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
236 
237   // Removes all payload types from the codec database.
238   virtual void RemoveAllPayloadTypes() = 0;
239 
240   // Sets a minimum delay in millisecond for packet buffer. The minimum is
241   // maintained unless a higher latency is dictated by channel condition.
242   // Returns true if the minimum is successfully applied, otherwise false is
243   // returned.
244   virtual bool SetMinimumDelay(int delay_ms) = 0;
245 
246   // Sets a maximum delay in milliseconds for packet buffer. The latency will
247   // not exceed the given value, even required delay (given the channel
248   // conditions) is higher. Calling this method has the same effect as setting
249   // the |max_delay_ms| value in the NetEq::Config struct.
250   virtual bool SetMaximumDelay(int delay_ms) = 0;
251 
252   // Sets a base minimum delay in milliseconds for packet buffer. The minimum
253   // delay which is set via |SetMinimumDelay| can't be lower than base minimum
254   // delay. Calling this method is similar to setting the |min_delay_ms| value
255   // in the NetEq::Config struct. Returns true if the base minimum is
256   // successfully applied, otherwise false is returned.
257   virtual bool SetBaseMinimumDelayMs(int delay_ms) = 0;
258 
259   // Returns current value of base minimum delay in milliseconds.
260   virtual int GetBaseMinimumDelayMs() const = 0;
261 
262   // Returns the current target delay in ms. This includes any extra delay
263   // requested through SetMinimumDelay.
264   virtual int TargetDelayMs() const = 0;
265 
266   // Returns the current total delay (packet buffer and sync buffer) in ms,
267   // with smoothing applied to even out short-time fluctuations due to jitter.
268   // The packet buffer part of the delay is not updated during DTX/CNG periods.
269   virtual int FilteredCurrentDelayMs() const = 0;
270 
271   // Writes the current network statistics to |stats|. The statistics are reset
272   // after the call.
273   virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
274 
275   // Current values only, not resetting any state.
276   virtual NetEqNetworkStatistics CurrentNetworkStatistics() const = 0;
277 
278   // Returns a copy of this class's lifetime statistics. These statistics are
279   // never reset.
280   virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0;
281 
282   // Returns statistics about the performed operations and internal state. These
283   // statistics are never reset.
284   virtual NetEqOperationsAndState GetOperationsAndState() const = 0;
285 
286   // Enables post-decode VAD. When enabled, GetAudio() will return
287   // kOutputVADPassive when the signal contains no speech.
288   virtual void EnableVad() = 0;
289 
290   // Disables post-decode VAD.
291   virtual void DisableVad() = 0;
292 
293   // Returns the RTP timestamp for the last sample delivered by GetAudio().
294   // The return value will be empty if no valid timestamp is available.
295   virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;
296 
297   // Returns the sample rate in Hz of the audio produced in the last GetAudio
298   // call. If GetAudio has not been called yet, the configured sample rate
299   // (Config::sample_rate_hz) is returned.
300   virtual int last_output_sample_rate_hz() const = 0;
301 
302   // Returns the decoder info for the given payload type. Returns empty if no
303   // such payload type was registered.
304   virtual absl::optional<DecoderFormat> GetDecoderFormat(
305       int payload_type) const = 0;
306 
307   // Flushes both the packet buffer and the sync buffer.
308   virtual void FlushBuffers() = 0;
309 
310   // Enables NACK and sets the maximum size of the NACK list, which should be
311   // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
312   // enabled then the maximum NACK list size is modified accordingly.
313   virtual void EnableNack(size_t max_nack_list_size) = 0;
314 
315   virtual void DisableNack() = 0;
316 
317   // Returns a list of RTP sequence numbers corresponding to packets to be
318   // retransmitted, given an estimate of the round-trip time in milliseconds.
319   virtual std::vector<uint16_t> GetNackList(
320       int64_t round_trip_time_ms) const = 0;
321 
322   // Returns a vector containing the timestamps of the packets that were decoded
323   // in the last GetAudio call. If no packets were decoded in the last call, the
324   // vector is empty.
325   // Mainly intended for testing.
326   virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
327 
328   // Returns the length of the audio yet to play in the sync buffer.
329   // Mainly intended for testing.
330   virtual int SyncBufferSizeMs() const = 0;
331 };
332 
333 }  // namespace webrtc
334 #endif  // API_NETEQ_NETEQ_H_
335