1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
12 #define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
13 
14 #include <string.h>  // Provide access to size_t.
15 
16 #include <vector>
17 
18 #include "webrtc/base/constructormagic.h"
19 #include "webrtc/common_types.h"
20 #include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
21 #include "webrtc/typedefs.h"
22 
23 namespace webrtc {
24 
25 // Forward declarations.
26 struct WebRtcRTPHeader;
27 
28 struct NetEqNetworkStatistics {
29   uint16_t current_buffer_size_ms;  // Current jitter buffer size in ms.
30   uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
31   uint16_t jitter_peaks_found;  // 1 if adding extra delay due to peaky
32                                 // jitter; 0 otherwise.
33   uint16_t packet_loss_rate;  // Loss rate (network + late) in Q14.
34   uint16_t packet_discard_rate;  // Late loss rate in Q14.
35   uint16_t expand_rate;  // Fraction (of original stream) of synthesized
36                          // audio inserted through expansion (in Q14).
37   uint16_t speech_expand_rate;  // Fraction (of original stream) of synthesized
38                                 // speech inserted through expansion (in Q14).
39   uint16_t preemptive_rate;  // Fraction of data inserted through pre-emptive
40                              // expansion (in Q14).
41   uint16_t accelerate_rate;  // Fraction of data removed through acceleration
42                              // (in Q14).
43   uint16_t secondary_decoded_rate;  // Fraction of data coming from secondary
44                                     // decoding (in Q14).
45   int32_t clockdrift_ppm;  // Average clock-drift in parts-per-million
46                            // (positive or negative).
47   int added_zero_samples;  // Number of zero samples added in "off" mode.
48 };
49 
50 enum NetEqOutputType {
51   kOutputNormal,
52   kOutputPLC,
53   kOutputCNG,
54   kOutputPLCtoCNG,
55   kOutputVADPassive
56 };
57 
58 enum NetEqPlayoutMode {
59   kPlayoutOn,
60   kPlayoutOff,
61   kPlayoutFax,
62   kPlayoutStreaming
63 };
64 
65 // This is the interface class for NetEq.
66 class NetEq {
67  public:
68   enum BackgroundNoiseMode {
69     kBgnOn,    // Default behavior with eternal noise.
70     kBgnFade,  // Noise fades to zero after some time.
71     kBgnOff    // Background noise is always zero.
72   };
73 
74   struct Config {
75     Config()
76         : sample_rate_hz(16000),
77           enable_audio_classifier(false),
78           max_packets_in_buffer(50),
79           // |max_delay_ms| has the same effect as calling SetMaximumDelay().
80           max_delay_ms(2000),
81           background_noise_mode(kBgnOff),
82           playout_mode(kPlayoutOn) {}
83 
84     int sample_rate_hz;  // Initial vale. Will change with input data.
85     bool enable_audio_classifier;
86     int max_packets_in_buffer;
87     int max_delay_ms;
88     BackgroundNoiseMode background_noise_mode;
89     NetEqPlayoutMode playout_mode;
90   };
91 
92   enum ReturnCodes {
93     kOK = 0,
94     kFail = -1,
95     kNotImplemented = -2
96   };
97 
98   enum ErrorCodes {
99     kNoError = 0,
100     kOtherError,
101     kInvalidRtpPayloadType,
102     kUnknownRtpPayloadType,
103     kCodecNotSupported,
104     kDecoderExists,
105     kDecoderNotFound,
106     kInvalidSampleRate,
107     kInvalidPointer,
108     kAccelerateError,
109     kPreemptiveExpandError,
110     kComfortNoiseErrorCode,
111     kDecoderErrorCode,
112     kOtherDecoderError,
113     kInvalidOperation,
114     kDtmfParameterError,
115     kDtmfParsingError,
116     kDtmfInsertError,
117     kStereoNotSupported,
118     kSampleUnderrun,
119     kDecodedTooMuch,
120     kFrameSplitError,
121     kRedundancySplitError,
122     kPacketBufferCorruption,
123     kSyncPacketNotAccepted
124   };
125 
126   // Creates a new NetEq object, with parameters set in |config|. The |config|
127   // object will only have to be valid for the duration of the call to this
128   // method.
129   static NetEq* Create(const NetEq::Config& config);
130 
131   virtual ~NetEq() {}
132 
133   // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
134   // of the time when the packet was received, and should be measured with
135   // the same tick rate as the RTP timestamp of the current payload.
136   // Returns 0 on success, -1 on failure.
137   virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
138                            const uint8_t* payload,
139                            size_t length_bytes,
140                            uint32_t receive_timestamp) = 0;
141 
142   // Inserts a sync-packet into packet queue. Sync-packets are decoded to
143   // silence and are intended to keep AV-sync intact in an event of long packet
144   // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
145   // might insert sync-packet when they observe that buffer level of NetEq is
146   // decreasing below a certain threshold, defined by the application.
147   // Sync-packets should have the same payload type as the last audio payload
148   // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
149   // can be implied by inserting a sync-packet.
150   // Returns kOk on success, kFail on failure.
151   virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
152                                uint32_t receive_timestamp) = 0;
153 
154   // Instructs NetEq to deliver 10 ms of audio data. The data is written to
155   // |output_audio|, which can hold (at least) |max_length| elements.
156   // The number of channels that were written to the output is provided in
157   // the output variable |num_channels|, and each channel contains
158   // |samples_per_channel| elements. If more than one channel is written,
159   // the samples are interleaved.
160   // The speech type is written to |type|, if |type| is not NULL.
161   // Returns kOK on success, or kFail in case of an error.
162   virtual int GetAudio(size_t max_length, int16_t* output_audio,
163                        int* samples_per_channel, int* num_channels,
164                        NetEqOutputType* type) = 0;
165 
166   // Associates |rtp_payload_type| with |codec| and stores the information in
167   // the codec database. Returns 0 on success, -1 on failure.
168   virtual int RegisterPayloadType(enum NetEqDecoder codec,
169                                   uint8_t rtp_payload_type) = 0;
170 
171   // Provides an externally created decoder object |decoder| to insert in the
172   // decoder database. The decoder implements a decoder of type |codec| and
173   // associates it with |rtp_payload_type|. Returns kOK on success,
174   // kFail on failure.
175   virtual int RegisterExternalDecoder(AudioDecoder* decoder,
176                                       enum NetEqDecoder codec,
177                                       uint8_t rtp_payload_type) = 0;
178 
179   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
180   // -1 on failure.
181   virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
182 
183   // Sets a minimum delay in millisecond for packet buffer. The minimum is
184   // maintained unless a higher latency is dictated by channel condition.
185   // Returns true if the minimum is successfully applied, otherwise false is
186   // returned.
187   virtual bool SetMinimumDelay(int delay_ms) = 0;
188 
189   // Sets a maximum delay in milliseconds for packet buffer. The latency will
190   // not exceed the given value, even required delay (given the channel
191   // conditions) is higher. Calling this method has the same effect as setting
192   // the |max_delay_ms| value in the NetEq::Config struct.
193   virtual bool SetMaximumDelay(int delay_ms) = 0;
194 
195   // The smallest latency required. This is computed bases on inter-arrival
196   // time and internal NetEq logic. Note that in computing this latency none of
197   // the user defined limits (applied by calling setMinimumDelay() and/or
198   // SetMaximumDelay()) are applied.
199   virtual int LeastRequiredDelayMs() const = 0;
200 
201   // Not implemented.
202   virtual int SetTargetDelay() = 0;
203 
204   // Not implemented.
205   virtual int TargetDelay() = 0;
206 
207   // Not implemented.
208   virtual int CurrentDelay() = 0;
209 
210   // Sets the playout mode to |mode|.
211   // Deprecated. Set the mode in the Config struct passed to the constructor.
212   // TODO(henrik.lundin) Delete.
213   virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
214 
215   // Returns the current playout mode.
216   // Deprecated.
217   // TODO(henrik.lundin) Delete.
218   virtual NetEqPlayoutMode PlayoutMode() const = 0;
219 
220   // Writes the current network statistics to |stats|. The statistics are reset
221   // after the call.
222   virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
223 
224   // Writes the last packet waiting times (in ms) to |waiting_times|. The number
225   // of values written is no more than 100, but may be smaller if the interface
226   // is polled again before 100 packets has arrived.
227   virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
228 
229   // Writes the current RTCP statistics to |stats|. The statistics are reset
230   // and a new report period is started with the call.
231   virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
232 
233   // Same as RtcpStatistics(), but does not reset anything.
234   virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
235 
236   // Enables post-decode VAD. When enabled, GetAudio() will return
237   // kOutputVADPassive when the signal contains no speech.
238   virtual void EnableVad() = 0;
239 
240   // Disables post-decode VAD.
241   virtual void DisableVad() = 0;
242 
243   // Gets the RTP timestamp for the last sample delivered by GetAudio().
244   // Returns true if the RTP timestamp is valid, otherwise false.
245   virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
246 
247   // Not implemented.
248   virtual int SetTargetNumberOfChannels() = 0;
249 
250   // Not implemented.
251   virtual int SetTargetSampleRate() = 0;
252 
253   // Returns the error code for the last occurred error. If no error has
254   // occurred, 0 is returned.
255   virtual int LastError() const = 0;
256 
257   // Returns the error code last returned by a decoder (audio or comfort noise).
258   // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
259   // this method to get the decoder's error code.
260   virtual int LastDecoderError() = 0;
261 
262   // Flushes both the packet buffer and the sync buffer.
263   virtual void FlushBuffers() = 0;
264 
265   // Current usage of packet-buffer and it's limits.
266   virtual void PacketBufferStatistics(int* current_num_packets,
267                                       int* max_num_packets) const = 0;
268 
269   // Get sequence number and timestamp of the latest RTP.
270   // This method is to facilitate NACK.
271   virtual int DecodedRtpInfo(int* sequence_number,
272                              uint32_t* timestamp) const = 0;
273 
274  protected:
275   NetEq() {}
276 
277  private:
278   DISALLOW_COPY_AND_ASSIGN(NetEq);
279 };
280 
281 }  // namespace webrtc
282 #endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
283