1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/rtp_sender_audio.h"
12 
13 #include <string.h>
14 
15 #include <memory>
16 #include <utility>
17 
18 #include "absl/strings/match.h"
19 #include "absl/types/optional.h"
20 #include "api/audio_codecs/audio_format.h"
21 #include "api/rtp_headers.h"
22 #include "modules/audio_coding/include/audio_coding_module_typedefs.h"
23 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
24 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
25 #include "modules/rtp_rtcp/source/byte_io.h"
26 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
27 #include "modules/rtp_rtcp/source/rtp_packet.h"
28 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
29 #include "modules/rtp_rtcp/source/time_util.h"
30 #include "rtc_base/checks.h"
31 #include "rtc_base/logging.h"
32 #include "rtc_base/trace_event.h"
33 #include "system_wrappers/include/ntp_time.h"
34 
35 namespace webrtc {
36 
37 namespace {
38 
39 #if RTC_TRACE_EVENTS_ENABLED
FrameTypeToString(AudioFrameType frame_type)40 const char* FrameTypeToString(AudioFrameType frame_type) {
41   switch (frame_type) {
42     case AudioFrameType::kEmptyFrame:
43       return "empty";
44     case AudioFrameType::kAudioFrameSpeech:
45       return "audio_speech";
46     case AudioFrameType::kAudioFrameCN:
47       return "audio_cn";
48   }
49 }
50 #endif
51 
52 }  // namespace
53 
RTPSenderAudio(Clock * clock,RTPSender * rtp_sender)54 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender)
55     : clock_(clock),
56       rtp_sender_(rtp_sender),
57       absolute_capture_time_sender_(clock) {
58   RTC_DCHECK(clock_);
59 }
60 
~RTPSenderAudio()61 RTPSenderAudio::~RTPSenderAudio() {}
62 
RegisterAudioPayload(absl::string_view payload_name,const int8_t payload_type,const uint32_t frequency,const size_t channels,const uint32_t rate)63 int32_t RTPSenderAudio::RegisterAudioPayload(absl::string_view payload_name,
64                                              const int8_t payload_type,
65                                              const uint32_t frequency,
66                                              const size_t channels,
67                                              const uint32_t rate) {
68   if (absl::EqualsIgnoreCase(payload_name, "cn")) {
69     rtc::CritScope cs(&send_audio_critsect_);
70     //  we can have multiple CNG payload types
71     switch (frequency) {
72       case 8000:
73         cngnb_payload_type_ = payload_type;
74         break;
75       case 16000:
76         cngwb_payload_type_ = payload_type;
77         break;
78       case 32000:
79         cngswb_payload_type_ = payload_type;
80         break;
81       case 48000:
82         cngfb_payload_type_ = payload_type;
83         break;
84       default:
85         return -1;
86     }
87   } else if (absl::EqualsIgnoreCase(payload_name, "telephone-event")) {
88     rtc::CritScope cs(&send_audio_critsect_);
89     // Don't add it to the list
90     // we dont want to allow send with a DTMF payloadtype
91     dtmf_payload_type_ = payload_type;
92     dtmf_payload_freq_ = frequency;
93     return 0;
94   } else if (payload_name == "audio") {
95     rtc::CritScope cs(&send_audio_critsect_);
96     encoder_rtp_timestamp_frequency_ = frequency;
97     return 0;
98   }
99   return 0;
100 }
101 
MarkerBit(AudioFrameType frame_type,int8_t payload_type)102 bool RTPSenderAudio::MarkerBit(AudioFrameType frame_type, int8_t payload_type) {
103   rtc::CritScope cs(&send_audio_critsect_);
104   // for audio true for first packet in a speech burst
105   bool marker_bit = false;
106   if (last_payload_type_ != payload_type) {
107     if (payload_type != -1 && (cngnb_payload_type_ == payload_type ||
108                                cngwb_payload_type_ == payload_type ||
109                                cngswb_payload_type_ == payload_type ||
110                                cngfb_payload_type_ == payload_type)) {
111       // Only set a marker bit when we change payload type to a non CNG
112       return false;
113     }
114 
115     // payload_type differ
116     if (last_payload_type_ == -1) {
117       if (frame_type != AudioFrameType::kAudioFrameCN) {
118         // first packet and NOT CNG
119         return true;
120       } else {
121         // first packet and CNG
122         inband_vad_active_ = true;
123         return false;
124       }
125     }
126 
127     // not first packet AND
128     // not CNG AND
129     // payload_type changed
130 
131     // set a marker bit when we change payload type
132     marker_bit = true;
133   }
134 
135   // For G.723 G.729, AMR etc we can have inband VAD
136   if (frame_type == AudioFrameType::kAudioFrameCN) {
137     inband_vad_active_ = true;
138   } else if (inband_vad_active_) {
139     inband_vad_active_ = false;
140     marker_bit = true;
141   }
142   return marker_bit;
143 }
144 
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size)145 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
146                                int8_t payload_type,
147                                uint32_t rtp_timestamp,
148                                const uint8_t* payload_data,
149                                size_t payload_size) {
150   return SendAudio(frame_type, payload_type, rtp_timestamp, payload_data,
151                    payload_size,
152                    // TODO(bugs.webrtc.org/10739) replace once plumbed.
153                    /*absolute_capture_timestamp_ms=*/0);
154 }
155 
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size,int64_t absolute_capture_timestamp_ms)156 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
157                                int8_t payload_type,
158                                uint32_t rtp_timestamp,
159                                const uint8_t* payload_data,
160                                size_t payload_size,
161                                int64_t absolute_capture_timestamp_ms) {
162 #if RTC_TRACE_EVENTS_ENABLED
163   TRACE_EVENT_ASYNC_STEP1("webrtc", "Audio", rtp_timestamp, "Send", "type",
164                           FrameTypeToString(frame_type));
165   #endif
166 
167   // From RFC 4733:
168   // A source has wide latitude as to how often it sends event updates. A
169   // natural interval is the spacing between non-event audio packets. [...]
170   // Alternatively, a source MAY decide to use a different spacing for event
171   // updates, with a value of 50 ms RECOMMENDED.
172   constexpr int kDtmfIntervalTimeMs = 50;
173   uint8_t audio_level_dbov = 0;
174   uint32_t dtmf_payload_freq = 0;
175   absl::optional<uint32_t> encoder_rtp_timestamp_frequency;
176   {
177     rtc::CritScope cs(&send_audio_critsect_);
178     audio_level_dbov = audio_level_dbov_;
179     dtmf_payload_freq = dtmf_payload_freq_;
180     encoder_rtp_timestamp_frequency = encoder_rtp_timestamp_frequency_;
181   }
182 
183   // Check if we have pending DTMFs to send
184   if (!dtmf_event_is_on_ && dtmf_queue_.PendingDtmf()) {
185     if ((clock_->TimeInMilliseconds() - dtmf_time_last_sent_) >
186         kDtmfIntervalTimeMs) {
187       // New tone to play
188       dtmf_timestamp_ = rtp_timestamp;
189       if (dtmf_queue_.NextDtmf(&dtmf_current_event_)) {
190         dtmf_event_first_packet_sent_ = false;
191         dtmf_length_samples_ =
192             dtmf_current_event_.duration_ms * (dtmf_payload_freq / 1000);
193         dtmf_event_is_on_ = true;
194       }
195     }
196   }
197 
198   // A source MAY send events and coded audio packets for the same time
199   // but we don't support it
200   if (dtmf_event_is_on_) {
201     if (frame_type == AudioFrameType::kEmptyFrame) {
202       // kEmptyFrame is used to drive the DTMF when in CN mode
203       // it can be triggered more frequently than we want to send the
204       // DTMF packets.
205       const unsigned int dtmf_interval_time_rtp =
206           dtmf_payload_freq * kDtmfIntervalTimeMs / 1000;
207       if ((rtp_timestamp - dtmf_timestamp_last_sent_) <
208           dtmf_interval_time_rtp) {
209         // not time to send yet
210         return true;
211       }
212     }
213     dtmf_timestamp_last_sent_ = rtp_timestamp;
214     uint32_t dtmf_duration_samples = rtp_timestamp - dtmf_timestamp_;
215     bool ended = false;
216     bool send = true;
217 
218     if (dtmf_length_samples_ > dtmf_duration_samples) {
219       if (dtmf_duration_samples <= 0) {
220         // Skip send packet at start, since we shouldn't use duration 0
221         send = false;
222       }
223     } else {
224       ended = true;
225       dtmf_event_is_on_ = false;
226       dtmf_time_last_sent_ = clock_->TimeInMilliseconds();
227     }
228     if (send) {
229       if (dtmf_duration_samples > 0xffff) {
230         // RFC 4733 2.5.2.3 Long-Duration Events
231         SendTelephoneEventPacket(ended, dtmf_timestamp_,
232                                  static_cast<uint16_t>(0xffff), false);
233 
234         // set new timestap for this segment
235         dtmf_timestamp_ = rtp_timestamp;
236         dtmf_duration_samples -= 0xffff;
237         dtmf_length_samples_ -= 0xffff;
238 
239         return SendTelephoneEventPacket(
240             ended, dtmf_timestamp_,
241             static_cast<uint16_t>(dtmf_duration_samples), false);
242       } else {
243         if (!SendTelephoneEventPacket(ended, dtmf_timestamp_,
244                                       dtmf_duration_samples,
245                                       !dtmf_event_first_packet_sent_)) {
246           return false;
247         }
248         dtmf_event_first_packet_sent_ = true;
249         return true;
250       }
251     }
252     return true;
253   }
254   if (payload_size == 0 || payload_data == NULL) {
255     if (frame_type == AudioFrameType::kEmptyFrame) {
256       // we don't send empty audio RTP packets
257       // no error since we use it to either drive DTMF when we use VAD, or
258       // enter DTX.
259       return true;
260     }
261     return false;
262   }
263 
264   std::unique_ptr<RtpPacketToSend> packet = rtp_sender_->AllocatePacket();
265   packet->SetMarker(MarkerBit(frame_type, payload_type));
266   packet->SetPayloadType(payload_type);
267   packet->SetTimestamp(rtp_timestamp);
268   packet->set_capture_time_ms(clock_->TimeInMilliseconds());
269   // Update audio level extension, if included.
270   packet->SetExtension<AudioLevel>(
271       frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
272 
273   // Send absolute capture time periodically in order to optimize and save
274   // network traffic. Missing absolute capture times can be interpolated on the
275   // receiving end if sending intervals are small enough.
276   auto absolute_capture_time = absolute_capture_time_sender_.OnSendPacket(
277       AbsoluteCaptureTimeSender::GetSource(packet->Ssrc(), packet->Csrcs()),
278       packet->Timestamp(),
279       // Replace missing value with 0 (invalid frequency), this will trigger
280       // absolute capture time sending.
281       encoder_rtp_timestamp_frequency.value_or(0),
282       Int64MsToUQ32x32(absolute_capture_timestamp_ms + NtpOffsetMs()),
283       /*estimated_capture_clock_offset=*/absl::nullopt);
284   if (absolute_capture_time) {
285     // It also checks that extension was registered during SDP negotiation. If
286     // not then setter won't do anything.
287     packet->SetExtension<AbsoluteCaptureTimeExtension>(*absolute_capture_time);
288   }
289 
290   uint8_t* payload = packet->AllocatePayload(payload_size);
291   if (!payload)  // Too large payload buffer.
292     return false;
293   memcpy(payload, payload_data, payload_size);
294 
295   if (!rtp_sender_->AssignSequenceNumber(packet.get()))
296     return false;
297 
298   {
299     rtc::CritScope cs(&send_audio_critsect_);
300     last_payload_type_ = payload_type;
301   }
302   TRACE_EVENT_ASYNC_END2("webrtc", "Audio", rtp_timestamp, "timestamp",
303                          packet->Timestamp(), "seqnum",
304                          packet->SequenceNumber());
305   packet->set_packet_type(RtpPacketMediaType::kAudio);
306   packet->set_allow_retransmission(true);
307   bool send_result = rtp_sender_->SendToNetwork(std::move(packet));
308   if (first_packet_sent_()) {
309     RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer";
310   }
311   return send_result;
312 }
313 
314 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dbov)315 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dbov) {
316   if (level_dbov > 127) {
317     return -1;
318   }
319   rtc::CritScope cs(&send_audio_critsect_);
320   audio_level_dbov_ = level_dbov;
321   return 0;
322 }
323 
324 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)325 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
326                                            uint16_t time_ms,
327                                            uint8_t level) {
328   DtmfQueue::Event event;
329   {
330     rtc::CritScope lock(&send_audio_critsect_);
331     if (dtmf_payload_type_ < 0) {
332       // TelephoneEvent payloadtype not configured
333       return -1;
334     }
335     event.payload_type = dtmf_payload_type_;
336   }
337   event.key = key;
338   event.duration_ms = time_ms;
339   event.level = level;
340   return dtmf_queue_.AddDtmf(event) ? 0 : -1;
341 }
342 
SendTelephoneEventPacket(bool ended,uint32_t dtmf_timestamp,uint16_t duration,bool marker_bit)343 bool RTPSenderAudio::SendTelephoneEventPacket(bool ended,
344                                               uint32_t dtmf_timestamp,
345                                               uint16_t duration,
346                                               bool marker_bit) {
347   uint8_t send_count = 1;
348   bool result = true;
349 
350   if (ended) {
351     // resend last packet in an event 3 times
352     send_count = 3;
353   }
354   do {
355     // Send DTMF data.
356     constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr;
357     constexpr size_t kDtmfSize = 4;
358     std::unique_ptr<RtpPacketToSend> packet(
359         new RtpPacketToSend(kNoExtensions, kRtpHeaderSize + kDtmfSize));
360     packet->SetPayloadType(dtmf_current_event_.payload_type);
361     packet->SetMarker(marker_bit);
362     packet->SetSsrc(rtp_sender_->SSRC());
363     packet->SetTimestamp(dtmf_timestamp);
364     packet->set_capture_time_ms(clock_->TimeInMilliseconds());
365     if (!rtp_sender_->AssignSequenceNumber(packet.get()))
366       return false;
367 
368     // Create DTMF data.
369     uint8_t* dtmfbuffer = packet->AllocatePayload(kDtmfSize);
370     RTC_DCHECK(dtmfbuffer);
371     /*    From RFC 2833:
372      0                   1                   2                   3
373      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
374     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
375     |     event     |E|R| volume    |          duration             |
376     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
377     */
378     // R bit always cleared
379     uint8_t R = 0x00;
380     uint8_t volume = dtmf_current_event_.level;
381 
382     // First packet un-ended
383     uint8_t E = ended ? 0x80 : 0x00;
384 
385     // First byte is Event number, equals key number
386     dtmfbuffer[0] = dtmf_current_event_.key;
387     dtmfbuffer[1] = E | R | volume;
388     ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 2, duration);
389 
390     packet->set_packet_type(RtpPacketMediaType::kAudio);
391     packet->set_allow_retransmission(true);
392     result = rtp_sender_->SendToNetwork(std::move(packet));
393     send_count--;
394   } while (send_count > 0 && result);
395 
396   return result;
397 }
398 }  // namespace webrtc
399