1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/rtp_sender_audio.h"
12 
13 #include <string.h>
14 
15 #include <memory>
16 #include <utility>
17 
18 #include "absl/strings/match.h"
19 #include "absl/types/optional.h"
20 #include "api/audio_codecs/audio_format.h"
21 #include "api/rtp_headers.h"
22 #include "modules/audio_coding/include/audio_coding_module_typedefs.h"
23 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
24 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
25 #include "modules/rtp_rtcp/source/byte_io.h"
26 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
27 #include "modules/rtp_rtcp/source/rtp_packet.h"
28 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
29 #include "modules/rtp_rtcp/source/time_util.h"
30 #include "rtc_base/checks.h"
31 #include "rtc_base/logging.h"
32 #include "rtc_base/trace_event.h"
33 #include "system_wrappers/include/ntp_time.h"
34 
35 namespace webrtc {
36 
37 namespace {
38 
39 #if RTC_TRACE_EVENTS_ENABLED
FrameTypeToString(AudioFrameType frame_type)40 const char* FrameTypeToString(AudioFrameType frame_type) {
41   switch (frame_type) {
42     case AudioFrameType::kEmptyFrame:
43       return "empty";
44     case AudioFrameType::kAudioFrameSpeech:
45       return "audio_speech";
46     case AudioFrameType::kAudioFrameCN:
47       return "audio_cn";
48   }
49 }
50 #endif
51 
52 constexpr char kIncludeCaptureClockOffset[] =
53     "WebRTC-IncludeCaptureClockOffset";
54 
55 }  // namespace
56 
RTPSenderAudio(Clock * clock,RTPSender * rtp_sender)57 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender)
58     : clock_(clock),
59       rtp_sender_(rtp_sender),
60       absolute_capture_time_sender_(clock),
61       include_capture_clock_offset_(
62           absl::StartsWith(field_trials_.Lookup(kIncludeCaptureClockOffset),
63                            "Enabled")) {
64   RTC_DCHECK(clock_);
65 }
66 
~RTPSenderAudio()67 RTPSenderAudio::~RTPSenderAudio() {}
68 
RegisterAudioPayload(absl::string_view payload_name,const int8_t payload_type,const uint32_t frequency,const size_t channels,const uint32_t rate)69 int32_t RTPSenderAudio::RegisterAudioPayload(absl::string_view payload_name,
70                                              const int8_t payload_type,
71                                              const uint32_t frequency,
72                                              const size_t channels,
73                                              const uint32_t rate) {
74   if (absl::EqualsIgnoreCase(payload_name, "cn")) {
75     MutexLock lock(&send_audio_mutex_);
76     //  we can have multiple CNG payload types
77     switch (frequency) {
78       case 8000:
79         cngnb_payload_type_ = payload_type;
80         break;
81       case 16000:
82         cngwb_payload_type_ = payload_type;
83         break;
84       case 32000:
85         cngswb_payload_type_ = payload_type;
86         break;
87       case 48000:
88         cngfb_payload_type_ = payload_type;
89         break;
90       default:
91         return -1;
92     }
93   } else if (absl::EqualsIgnoreCase(payload_name, "telephone-event")) {
94     MutexLock lock(&send_audio_mutex_);
95     // Don't add it to the list
96     // we dont want to allow send with a DTMF payloadtype
97     dtmf_payload_type_ = payload_type;
98     dtmf_payload_freq_ = frequency;
99     return 0;
100   } else if (payload_name == "audio") {
101     MutexLock lock(&send_audio_mutex_);
102     encoder_rtp_timestamp_frequency_ = frequency;
103     return 0;
104   }
105   return 0;
106 }
107 
MarkerBit(AudioFrameType frame_type,int8_t payload_type)108 bool RTPSenderAudio::MarkerBit(AudioFrameType frame_type, int8_t payload_type) {
109   MutexLock lock(&send_audio_mutex_);
110   // for audio true for first packet in a speech burst
111   bool marker_bit = false;
112   if (last_payload_type_ != payload_type) {
113     if (payload_type != -1 && (cngnb_payload_type_ == payload_type ||
114                                cngwb_payload_type_ == payload_type ||
115                                cngswb_payload_type_ == payload_type ||
116                                cngfb_payload_type_ == payload_type)) {
117       // Only set a marker bit when we change payload type to a non CNG
118       return false;
119     }
120 
121     // payload_type differ
122     if (last_payload_type_ == -1) {
123       if (frame_type != AudioFrameType::kAudioFrameCN) {
124         // first packet and NOT CNG
125         return true;
126       } else {
127         // first packet and CNG
128         inband_vad_active_ = true;
129         return false;
130       }
131     }
132 
133     // not first packet AND
134     // not CNG AND
135     // payload_type changed
136 
137     // set a marker bit when we change payload type
138     marker_bit = true;
139   }
140 
141   // For G.723 G.729, AMR etc we can have inband VAD
142   if (frame_type == AudioFrameType::kAudioFrameCN) {
143     inband_vad_active_ = true;
144   } else if (inband_vad_active_) {
145     inband_vad_active_ = false;
146     marker_bit = true;
147   }
148   return marker_bit;
149 }
150 
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size)151 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
152                                int8_t payload_type,
153                                uint32_t rtp_timestamp,
154                                const uint8_t* payload_data,
155                                size_t payload_size) {
156   return SendAudio(frame_type, payload_type, rtp_timestamp, payload_data,
157                    payload_size,
158                    // TODO(bugs.webrtc.org/10739) replace once plumbed.
159                    /*absolute_capture_timestamp_ms=*/-1);
160 }
161 
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size,int64_t absolute_capture_timestamp_ms)162 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
163                                int8_t payload_type,
164                                uint32_t rtp_timestamp,
165                                const uint8_t* payload_data,
166                                size_t payload_size,
167                                int64_t absolute_capture_timestamp_ms) {
168 #if RTC_TRACE_EVENTS_ENABLED
169   TRACE_EVENT_ASYNC_STEP1("webrtc", "Audio", rtp_timestamp, "Send", "type",
170                           FrameTypeToString(frame_type));
171   #endif
172 
173   // From RFC 4733:
174   // A source has wide latitude as to how often it sends event updates. A
175   // natural interval is the spacing between non-event audio packets. [...]
176   // Alternatively, a source MAY decide to use a different spacing for event
177   // updates, with a value of 50 ms RECOMMENDED.
178   constexpr int kDtmfIntervalTimeMs = 50;
179   uint8_t audio_level_dbov = 0;
180   uint32_t dtmf_payload_freq = 0;
181   absl::optional<uint32_t> encoder_rtp_timestamp_frequency;
182   {
183     MutexLock lock(&send_audio_mutex_);
184     audio_level_dbov = audio_level_dbov_;
185     dtmf_payload_freq = dtmf_payload_freq_;
186     encoder_rtp_timestamp_frequency = encoder_rtp_timestamp_frequency_;
187   }
188 
189   // Check if we have pending DTMFs to send
190   if (!dtmf_event_is_on_ && dtmf_queue_.PendingDtmf()) {
191     if ((clock_->TimeInMilliseconds() - dtmf_time_last_sent_) >
192         kDtmfIntervalTimeMs) {
193       // New tone to play
194       dtmf_timestamp_ = rtp_timestamp;
195       if (dtmf_queue_.NextDtmf(&dtmf_current_event_)) {
196         dtmf_event_first_packet_sent_ = false;
197         dtmf_length_samples_ =
198             dtmf_current_event_.duration_ms * (dtmf_payload_freq / 1000);
199         dtmf_event_is_on_ = true;
200       }
201     }
202   }
203 
204   // A source MAY send events and coded audio packets for the same time
205   // but we don't support it
206   if (dtmf_event_is_on_) {
207     if (frame_type == AudioFrameType::kEmptyFrame) {
208       // kEmptyFrame is used to drive the DTMF when in CN mode
209       // it can be triggered more frequently than we want to send the
210       // DTMF packets.
211       const unsigned int dtmf_interval_time_rtp =
212           dtmf_payload_freq * kDtmfIntervalTimeMs / 1000;
213       if ((rtp_timestamp - dtmf_timestamp_last_sent_) <
214           dtmf_interval_time_rtp) {
215         // not time to send yet
216         return true;
217       }
218     }
219     dtmf_timestamp_last_sent_ = rtp_timestamp;
220     uint32_t dtmf_duration_samples = rtp_timestamp - dtmf_timestamp_;
221     bool ended = false;
222     bool send = true;
223 
224     if (dtmf_length_samples_ > dtmf_duration_samples) {
225       if (dtmf_duration_samples <= 0) {
226         // Skip send packet at start, since we shouldn't use duration 0
227         send = false;
228       }
229     } else {
230       ended = true;
231       dtmf_event_is_on_ = false;
232       dtmf_time_last_sent_ = clock_->TimeInMilliseconds();
233     }
234     if (send) {
235       if (dtmf_duration_samples > 0xffff) {
236         // RFC 4733 2.5.2.3 Long-Duration Events
237         SendTelephoneEventPacket(ended, dtmf_timestamp_,
238                                  static_cast<uint16_t>(0xffff), false);
239 
240         // set new timestap for this segment
241         dtmf_timestamp_ = rtp_timestamp;
242         dtmf_duration_samples -= 0xffff;
243         dtmf_length_samples_ -= 0xffff;
244 
245         return SendTelephoneEventPacket(
246             ended, dtmf_timestamp_,
247             static_cast<uint16_t>(dtmf_duration_samples), false);
248       } else {
249         if (!SendTelephoneEventPacket(ended, dtmf_timestamp_,
250                                       dtmf_duration_samples,
251                                       !dtmf_event_first_packet_sent_)) {
252           return false;
253         }
254         dtmf_event_first_packet_sent_ = true;
255         return true;
256       }
257     }
258     return true;
259   }
260   if (payload_size == 0 || payload_data == NULL) {
261     if (frame_type == AudioFrameType::kEmptyFrame) {
262       // we don't send empty audio RTP packets
263       // no error since we use it to either drive DTMF when we use VAD, or
264       // enter DTX.
265       return true;
266     }
267     return false;
268   }
269 
270   std::unique_ptr<RtpPacketToSend> packet = rtp_sender_->AllocatePacket();
271   packet->SetMarker(MarkerBit(frame_type, payload_type));
272   packet->SetPayloadType(payload_type);
273   packet->SetTimestamp(rtp_timestamp);
274   packet->set_capture_time_ms(clock_->TimeInMilliseconds());
275   // Update audio level extension, if included.
276   packet->SetExtension<AudioLevel>(
277       frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
278 
279   if (absolute_capture_timestamp_ms > 0) {
280     // Send absolute capture time periodically in order to optimize and save
281     // network traffic. Missing absolute capture times can be interpolated on
282     // the receiving end if sending intervals are small enough.
283     auto absolute_capture_time = absolute_capture_time_sender_.OnSendPacket(
284         AbsoluteCaptureTimeSender::GetSource(packet->Ssrc(), packet->Csrcs()),
285         packet->Timestamp(),
286         // Replace missing value with 0 (invalid frequency), this will trigger
287         // absolute capture time sending.
288         encoder_rtp_timestamp_frequency.value_or(0),
289         Int64MsToUQ32x32(clock_->ConvertTimestampToNtpTimeInMilliseconds(
290             absolute_capture_timestamp_ms)),
291         /*estimated_capture_clock_offset=*/
292         include_capture_clock_offset_ ? absl::make_optional(0) : absl::nullopt);
293     if (absolute_capture_time) {
294       // It also checks that extension was registered during SDP negotiation. If
295       // not then setter won't do anything.
296       packet->SetExtension<AbsoluteCaptureTimeExtension>(
297           *absolute_capture_time);
298     }
299   }
300 
301   uint8_t* payload = packet->AllocatePayload(payload_size);
302   if (!payload)  // Too large payload buffer.
303     return false;
304   memcpy(payload, payload_data, payload_size);
305 
306   if (!rtp_sender_->AssignSequenceNumber(packet.get()))
307     return false;
308 
309   {
310     MutexLock lock(&send_audio_mutex_);
311     last_payload_type_ = payload_type;
312   }
313   TRACE_EVENT_ASYNC_END2("webrtc", "Audio", rtp_timestamp, "timestamp",
314                          packet->Timestamp(), "seqnum",
315                          packet->SequenceNumber());
316   packet->set_packet_type(RtpPacketMediaType::kAudio);
317   packet->set_allow_retransmission(true);
318   bool send_result = rtp_sender_->SendToNetwork(std::move(packet));
319   if (first_packet_sent_()) {
320     RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer";
321   }
322   return send_result;
323 }
324 
325 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dbov)326 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dbov) {
327   if (level_dbov > 127) {
328     return -1;
329   }
330   MutexLock lock(&send_audio_mutex_);
331   audio_level_dbov_ = level_dbov;
332   return 0;
333 }
334 
335 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)336 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
337                                            uint16_t time_ms,
338                                            uint8_t level) {
339   DtmfQueue::Event event;
340   {
341     MutexLock lock(&send_audio_mutex_);
342     if (dtmf_payload_type_ < 0) {
343       // TelephoneEvent payloadtype not configured
344       return -1;
345     }
346     event.payload_type = dtmf_payload_type_;
347   }
348   event.key = key;
349   event.duration_ms = time_ms;
350   event.level = level;
351   return dtmf_queue_.AddDtmf(event) ? 0 : -1;
352 }
353 
SendTelephoneEventPacket(bool ended,uint32_t dtmf_timestamp,uint16_t duration,bool marker_bit)354 bool RTPSenderAudio::SendTelephoneEventPacket(bool ended,
355                                               uint32_t dtmf_timestamp,
356                                               uint16_t duration,
357                                               bool marker_bit) {
358   uint8_t send_count = 1;
359   bool result = true;
360 
361   if (ended) {
362     // resend last packet in an event 3 times
363     send_count = 3;
364   }
365   do {
366     // Send DTMF data.
367     constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr;
368     constexpr size_t kDtmfSize = 4;
369     std::unique_ptr<RtpPacketToSend> packet(
370         new RtpPacketToSend(kNoExtensions, kRtpHeaderSize + kDtmfSize));
371     packet->SetPayloadType(dtmf_current_event_.payload_type);
372     packet->SetMarker(marker_bit);
373     packet->SetSsrc(rtp_sender_->SSRC());
374     packet->SetTimestamp(dtmf_timestamp);
375     packet->set_capture_time_ms(clock_->TimeInMilliseconds());
376     if (!rtp_sender_->AssignSequenceNumber(packet.get()))
377       return false;
378 
379     // Create DTMF data.
380     uint8_t* dtmfbuffer = packet->AllocatePayload(kDtmfSize);
381     RTC_DCHECK(dtmfbuffer);
382     /*    From RFC 2833:
383      0                   1                   2                   3
384      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
385     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
386     |     event     |E|R| volume    |          duration             |
387     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
388     */
389     // R bit always cleared
390     uint8_t R = 0x00;
391     uint8_t volume = dtmf_current_event_.level;
392 
393     // First packet un-ended
394     uint8_t E = ended ? 0x80 : 0x00;
395 
396     // First byte is Event number, equals key number
397     dtmfbuffer[0] = dtmf_current_event_.key;
398     dtmfbuffer[1] = E | R | volume;
399     ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 2, duration);
400 
401     packet->set_packet_type(RtpPacketMediaType::kAudio);
402     packet->set_allow_retransmission(true);
403     result = rtp_sender_->SendToNetwork(std::move(packet));
404     send_count--;
405   } while (send_count > 0 && result);
406 
407   return result;
408 }
409 }  // namespace webrtc
410