1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/rtp_sender_video.h"
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <algorithm>
17 #include <limits>
18 #include <memory>
19 #include <string>
20 #include <utility>
21 
22 #include "absl/algorithm/container.h"
23 #include "absl/memory/memory.h"
24 #include "absl/strings/match.h"
25 #include "api/crypto/frame_encryptor_interface.h"
26 #include "api/transport/rtp/dependency_descriptor.h"
27 #include "modules/remote_bitrate_estimator/test/bwe_test_logging.h"
28 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
29 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
30 #include "modules/rtp_rtcp/source/byte_io.h"
31 #include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
32 #include "modules/rtp_rtcp/source/rtp_descriptor_authentication.h"
33 #include "modules/rtp_rtcp/source/rtp_format.h"
34 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
35 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
36 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
37 #include "modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.h"
38 #include "modules/rtp_rtcp/source/time_util.h"
39 #include "rtc_base/checks.h"
40 #include "rtc_base/experiments/field_trial_parser.h"
41 #include "rtc_base/logging.h"
42 #include "rtc_base/trace_event.h"
43 
44 namespace webrtc {
45 
46 namespace {
47 constexpr size_t kRedForFecHeaderLength = 1;
48 constexpr int64_t kMaxUnretransmittableFrameIntervalMs = 33 * 4;
49 constexpr char kIncludeCaptureClockOffset[] =
50     "WebRTC-IncludeCaptureClockOffset";
51 
BuildRedPayload(const RtpPacketToSend & media_packet,RtpPacketToSend * red_packet)52 void BuildRedPayload(const RtpPacketToSend& media_packet,
53                      RtpPacketToSend* red_packet) {
54   uint8_t* red_payload = red_packet->AllocatePayload(
55       kRedForFecHeaderLength + media_packet.payload_size());
56   RTC_DCHECK(red_payload);
57   red_payload[0] = media_packet.PayloadType();
58 
59   auto media_payload = media_packet.payload();
60   memcpy(&red_payload[kRedForFecHeaderLength], media_payload.data(),
61          media_payload.size());
62 }
63 
MinimizeDescriptor(RTPVideoHeader * video_header)64 bool MinimizeDescriptor(RTPVideoHeader* video_header) {
65   if (auto* vp8 =
66           absl::get_if<RTPVideoHeaderVP8>(&video_header->video_type_header)) {
67     // Set minimum fields the RtpPacketizer is using to create vp8 packets.
68     // nonReference is the only field that doesn't require extra space.
69     bool non_reference = vp8->nonReference;
70     vp8->InitRTPVideoHeaderVP8();
71     vp8->nonReference = non_reference;
72     return true;
73   }
74   // TODO(danilchap): Reduce vp9 codec specific descriptor too.
75   return false;
76 }
77 
IsBaseLayer(const RTPVideoHeader & video_header)78 bool IsBaseLayer(const RTPVideoHeader& video_header) {
79   switch (video_header.codec) {
80     case kVideoCodecVP8: {
81       const auto& vp8 =
82           absl::get<RTPVideoHeaderVP8>(video_header.video_type_header);
83       return (vp8.temporalIdx == 0 || vp8.temporalIdx == kNoTemporalIdx);
84     }
85     case kVideoCodecVP9: {
86       const auto& vp9 =
87           absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
88       return (vp9.temporal_idx == 0 || vp9.temporal_idx == kNoTemporalIdx);
89     }
90     case kVideoCodecH264:
91       // TODO(kron): Implement logic for H264 once WebRTC supports temporal
92       // layers for H264.
93       break;
94     default:
95       break;
96   }
97   return true;
98 }
99 
100 #if RTC_TRACE_EVENTS_ENABLED
FrameTypeToString(VideoFrameType frame_type)101 const char* FrameTypeToString(VideoFrameType frame_type) {
102   switch (frame_type) {
103     case VideoFrameType::kEmptyFrame:
104       return "empty";
105     case VideoFrameType::kVideoFrameKey:
106       return "video_key";
107     case VideoFrameType::kVideoFrameDelta:
108       return "video_delta";
109     default:
110       RTC_NOTREACHED();
111       return "";
112   }
113 }
114 #endif
115 
IsNoopDelay(const VideoPlayoutDelay & delay)116 bool IsNoopDelay(const VideoPlayoutDelay& delay) {
117   return delay.min_ms == -1 && delay.max_ms == -1;
118 }
119 
LoadVideoPlayoutDelayOverride(const WebRtcKeyValueConfig * key_value_config)120 absl::optional<VideoPlayoutDelay> LoadVideoPlayoutDelayOverride(
121     const WebRtcKeyValueConfig* key_value_config) {
122   RTC_DCHECK(key_value_config);
123   FieldTrialOptional<int> playout_delay_min_ms("min_ms", absl::nullopt);
124   FieldTrialOptional<int> playout_delay_max_ms("max_ms", absl::nullopt);
125   ParseFieldTrial({&playout_delay_max_ms, &playout_delay_min_ms},
126                   key_value_config->Lookup("WebRTC-ForceSendPlayoutDelay"));
127   return playout_delay_max_ms && playout_delay_min_ms
128              ? absl::make_optional<VideoPlayoutDelay>(*playout_delay_min_ms,
129                                                       *playout_delay_max_ms)
130              : absl::nullopt;
131 }
132 
133 // Some packets can be skipped and the stream can still be decoded. Those
134 // packets are less likely to be retransmitted if they are lost.
PacketWillLikelyBeRequestedForRestransmitionIfLost(const RTPVideoHeader & video_header)135 bool PacketWillLikelyBeRequestedForRestransmitionIfLost(
136     const RTPVideoHeader& video_header) {
137   return IsBaseLayer(video_header) &&
138          !(video_header.generic.has_value()
139                ? absl::c_linear_search(
140                      video_header.generic->decode_target_indications,
141                      DecodeTargetIndication::kDiscardable)
142                : false);
143 }
144 
145 }  // namespace
146 
RTPSenderVideo(const Config & config)147 RTPSenderVideo::RTPSenderVideo(const Config& config)
148     : rtp_sender_(config.rtp_sender),
149       clock_(config.clock),
150       retransmission_settings_(
151           config.enable_retransmit_all_layers
152               ? kRetransmitAllLayers
153               : (kRetransmitBaseLayer | kConditionallyRetransmitHigherLayers)),
154       last_rotation_(kVideoRotation_0),
155       transmit_color_space_next_frame_(false),
156       send_allocation_(SendVideoLayersAllocation::kDontSend),
157       current_playout_delay_{-1, -1},
158       playout_delay_pending_(false),
159       forced_playout_delay_(LoadVideoPlayoutDelayOverride(config.field_trials)),
160       red_payload_type_(config.red_payload_type),
161       fec_type_(config.fec_type),
162       fec_overhead_bytes_(config.fec_overhead_bytes),
163       packetization_overhead_bitrate_(1000, RateStatistics::kBpsScale),
164       frame_encryptor_(config.frame_encryptor),
165       require_frame_encryption_(config.require_frame_encryption),
166       generic_descriptor_auth_experiment_(!absl::StartsWith(
167           config.field_trials->Lookup("WebRTC-GenericDescriptorAuth"),
168           "Disabled")),
169       absolute_capture_time_sender_(config.clock),
170       frame_transformer_delegate_(
171           config.frame_transformer
172               ? new rtc::RefCountedObject<
173                     RTPSenderVideoFrameTransformerDelegate>(
174                     this,
175                     config.frame_transformer,
176                     rtp_sender_->SSRC(),
177                     config.send_transport_queue)
178               : nullptr),
179       include_capture_clock_offset_(absl::StartsWith(
180           config.field_trials->Lookup(kIncludeCaptureClockOffset),
181           "Enabled")) {
182   if (frame_transformer_delegate_)
183     frame_transformer_delegate_->Init();
184 }
185 
~RTPSenderVideo()186 RTPSenderVideo::~RTPSenderVideo() {
187   if (frame_transformer_delegate_)
188     frame_transformer_delegate_->Reset();
189 }
190 
LogAndSendToNetwork(std::vector<std::unique_ptr<RtpPacketToSend>> packets,size_t unpacketized_payload_size)191 void RTPSenderVideo::LogAndSendToNetwork(
192     std::vector<std::unique_ptr<RtpPacketToSend>> packets,
193     size_t unpacketized_payload_size) {
194   {
195     MutexLock lock(&stats_mutex_);
196     size_t packetized_payload_size = 0;
197     for (const auto& packet : packets) {
198       if (*packet->packet_type() == RtpPacketMediaType::kVideo) {
199         packetized_payload_size += packet->payload_size();
200       }
201     }
202     // AV1 and H264 packetizers may produce less packetized bytes than
203     // unpacketized.
204     if (packetized_payload_size >= unpacketized_payload_size) {
205       packetization_overhead_bitrate_.Update(
206           packetized_payload_size - unpacketized_payload_size,
207           clock_->TimeInMilliseconds());
208     }
209   }
210 
211   rtp_sender_->EnqueuePackets(std::move(packets));
212 }
213 
FecPacketOverhead() const214 size_t RTPSenderVideo::FecPacketOverhead() const {
215   size_t overhead = fec_overhead_bytes_;
216   if (red_enabled()) {
217     // The RED overhead is due to a small header.
218     overhead += kRedForFecHeaderLength;
219 
220     if (fec_type_ == VideoFecGenerator::FecType::kUlpFec) {
221       // For ULPFEC, the overhead is the FEC headers plus RED for FEC header
222       // (see above) plus anything in RTP header beyond the 12 bytes base header
223       // (CSRC list, extensions...)
224       // This reason for the header extensions to be included here is that
225       // from an FEC viewpoint, they are part of the payload to be protected.
226       // (The base RTP header is already protected by the FEC header.)
227       overhead +=
228           rtp_sender_->FecOrPaddingPacketMaxRtpHeaderLength() - kRtpHeaderSize;
229     }
230   }
231   return overhead;
232 }
233 
SetVideoStructure(const FrameDependencyStructure * video_structure)234 void RTPSenderVideo::SetVideoStructure(
235     const FrameDependencyStructure* video_structure) {
236   if (frame_transformer_delegate_) {
237     frame_transformer_delegate_->SetVideoStructureUnderLock(video_structure);
238     return;
239   }
240   SetVideoStructureInternal(video_structure);
241 }
242 
SetVideoStructureAfterTransformation(const FrameDependencyStructure * video_structure)243 void RTPSenderVideo::SetVideoStructureAfterTransformation(
244     const FrameDependencyStructure* video_structure) {
245   SetVideoStructureInternal(video_structure);
246 }
247 
SetVideoStructureInternal(const FrameDependencyStructure * video_structure)248 void RTPSenderVideo::SetVideoStructureInternal(
249     const FrameDependencyStructure* video_structure) {
250   RTC_DCHECK_RUNS_SERIALIZED(&send_checker_);
251   if (video_structure == nullptr) {
252     video_structure_ = nullptr;
253     return;
254   }
255   // Simple sanity checks video structure is set up.
256   RTC_DCHECK_GT(video_structure->num_decode_targets, 0);
257   RTC_DCHECK_GT(video_structure->templates.size(), 0);
258 
259   int structure_id = 0;
260   if (video_structure_) {
261     if (*video_structure_ == *video_structure) {
262       // Same structure (just a new key frame), no update required.
263       return;
264     }
265     // When setting different video structure make sure structure_id is updated
266     // so that templates from different structures do not collide.
267     static constexpr int kMaxTemplates = 64;
268     structure_id =
269         (video_structure_->structure_id + video_structure_->templates.size()) %
270         kMaxTemplates;
271   }
272 
273   video_structure_ =
274       std::make_unique<FrameDependencyStructure>(*video_structure);
275   video_structure_->structure_id = structure_id;
276 }
277 
SetVideoLayersAllocation(VideoLayersAllocation allocation)278 void RTPSenderVideo::SetVideoLayersAllocation(
279     VideoLayersAllocation allocation) {
280   if (frame_transformer_delegate_) {
281     frame_transformer_delegate_->SetVideoLayersAllocationUnderLock(
282         std::move(allocation));
283     return;
284   }
285   SetVideoLayersAllocationInternal(std::move(allocation));
286 }
287 
SetVideoLayersAllocationAfterTransformation(VideoLayersAllocation allocation)288 void RTPSenderVideo::SetVideoLayersAllocationAfterTransformation(
289     VideoLayersAllocation allocation) {
290   SetVideoLayersAllocationInternal(std::move(allocation));
291 }
292 
SetVideoLayersAllocationInternal(VideoLayersAllocation allocation)293 void RTPSenderVideo::SetVideoLayersAllocationInternal(
294     VideoLayersAllocation allocation) {
295   RTC_DCHECK_RUNS_SERIALIZED(&send_checker_);
296   if (!allocation_ || allocation.active_spatial_layers.size() >
297                           allocation_->active_spatial_layers.size()) {
298     send_allocation_ = SendVideoLayersAllocation::kSendWithResolution;
299   } else if (send_allocation_ == SendVideoLayersAllocation::kDontSend) {
300     send_allocation_ = SendVideoLayersAllocation::kSendWithoutResolution;
301   }
302   allocation_ = std::move(allocation);
303 }
304 
AddRtpHeaderExtensions(const RTPVideoHeader & video_header,const absl::optional<AbsoluteCaptureTime> & absolute_capture_time,bool first_packet,bool last_packet,RtpPacketToSend * packet) const305 void RTPSenderVideo::AddRtpHeaderExtensions(
306     const RTPVideoHeader& video_header,
307     const absl::optional<AbsoluteCaptureTime>& absolute_capture_time,
308     bool first_packet,
309     bool last_packet,
310     RtpPacketToSend* packet) const {
311   // Send color space when changed or if the frame is a key frame. Keep
312   // sending color space information until the first base layer frame to
313   // guarantee that the information is retrieved by the receiver.
314   bool set_color_space =
315       video_header.color_space != last_color_space_ ||
316       video_header.frame_type == VideoFrameType::kVideoFrameKey ||
317       transmit_color_space_next_frame_;
318   // Color space requires two-byte header extensions if HDR metadata is
319   // included. Therefore, it's best to add this extension first so that the
320   // other extensions in the same packet are written as two-byte headers at
321   // once.
322   if (last_packet && set_color_space && video_header.color_space)
323     packet->SetExtension<ColorSpaceExtension>(video_header.color_space.value());
324 
325   // According to
326   // http://www.etsi.org/deliver/etsi_ts/126100_126199/126114/12.07.00_60/
327   // ts_126114v120700p.pdf Section 7.4.5:
328   // The MTSI client shall add the payload bytes as defined in this clause
329   // onto the last RTP packet in each group of packets which make up a key
330   // frame (I-frame or IDR frame in H.264 (AVC), or an IRAP picture in H.265
331   // (HEVC)). The MTSI client may also add the payload bytes onto the last RTP
332   // packet in each group of packets which make up another type of frame
333   // (e.g. a P-Frame) only if the current value is different from the previous
334   // value sent.
335   // Set rotation when key frame or when changed (to follow standard).
336   // Or when different from 0 (to follow current receiver implementation).
337   bool set_video_rotation =
338       video_header.frame_type == VideoFrameType::kVideoFrameKey ||
339       video_header.rotation != last_rotation_ ||
340       video_header.rotation != kVideoRotation_0;
341   if (last_packet && set_video_rotation)
342     packet->SetExtension<VideoOrientation>(video_header.rotation);
343 
344   // Report content type only for key frames.
345   if (last_packet &&
346       video_header.frame_type == VideoFrameType::kVideoFrameKey &&
347       video_header.content_type != VideoContentType::UNSPECIFIED)
348     packet->SetExtension<VideoContentTypeExtension>(video_header.content_type);
349 
350   if (last_packet &&
351       video_header.video_timing.flags != VideoSendTiming::kInvalid)
352     packet->SetExtension<VideoTimingExtension>(video_header.video_timing);
353 
354   // If transmitted, add to all packets; ack logic depends on this.
355   if (playout_delay_pending_) {
356     packet->SetExtension<PlayoutDelayLimits>(current_playout_delay_);
357   }
358 
359   if (first_packet && absolute_capture_time) {
360     packet->SetExtension<AbsoluteCaptureTimeExtension>(*absolute_capture_time);
361   }
362 
363   if (video_header.generic) {
364     bool extension_is_set = false;
365     if (video_structure_ != nullptr) {
366       DependencyDescriptor descriptor;
367       descriptor.first_packet_in_frame = first_packet;
368       descriptor.last_packet_in_frame = last_packet;
369       descriptor.frame_number = video_header.generic->frame_id & 0xFFFF;
370       descriptor.frame_dependencies.spatial_id =
371           video_header.generic->spatial_index;
372       descriptor.frame_dependencies.temporal_id =
373           video_header.generic->temporal_index;
374       for (int64_t dep : video_header.generic->dependencies) {
375         descriptor.frame_dependencies.frame_diffs.push_back(
376             video_header.generic->frame_id - dep);
377       }
378       descriptor.frame_dependencies.chain_diffs =
379           video_header.generic->chain_diffs;
380       descriptor.frame_dependencies.decode_target_indications =
381           video_header.generic->decode_target_indications;
382       RTC_DCHECK_EQ(
383           descriptor.frame_dependencies.decode_target_indications.size(),
384           video_structure_->num_decode_targets);
385 
386       if (first_packet) {
387         descriptor.active_decode_targets_bitmask =
388             active_decode_targets_tracker_.ActiveDecodeTargetsBitmask();
389       }
390       // VP9 mark all layer frames of the first picture as kVideoFrameKey,
391       // Structure should be attached to the descriptor to lowest spatial layer
392       // when inter layer dependency is used, i.e. L structures; or to all
393       // layers when inter layer dependency is not used, i.e. S structures.
394       // Distinguish these two cases by checking if there are any dependencies.
395       if (video_header.frame_type == VideoFrameType::kVideoFrameKey &&
396           video_header.generic->dependencies.empty() && first_packet) {
397         // To avoid extra structure copy, temporary share ownership of the
398         // video_structure with the dependency descriptor.
399         descriptor.attached_structure =
400             absl::WrapUnique(video_structure_.get());
401       }
402       extension_is_set = packet->SetExtension<RtpDependencyDescriptorExtension>(
403           *video_structure_,
404           active_decode_targets_tracker_.ActiveChainsBitmask(), descriptor);
405 
406       // Remove the temporary shared ownership.
407       descriptor.attached_structure.release();
408     }
409 
410     // Do not use generic frame descriptor when dependency descriptor is stored.
411     if (!extension_is_set) {
412       RtpGenericFrameDescriptor generic_descriptor;
413       generic_descriptor.SetFirstPacketInSubFrame(first_packet);
414       generic_descriptor.SetLastPacketInSubFrame(last_packet);
415 
416       if (first_packet) {
417         generic_descriptor.SetFrameId(
418             static_cast<uint16_t>(video_header.generic->frame_id));
419         for (int64_t dep : video_header.generic->dependencies) {
420           generic_descriptor.AddFrameDependencyDiff(
421               video_header.generic->frame_id - dep);
422         }
423 
424         uint8_t spatial_bimask = 1 << video_header.generic->spatial_index;
425         generic_descriptor.SetSpatialLayersBitmask(spatial_bimask);
426 
427         generic_descriptor.SetTemporalLayer(
428             video_header.generic->temporal_index);
429 
430         if (video_header.frame_type == VideoFrameType::kVideoFrameKey) {
431           generic_descriptor.SetResolution(video_header.width,
432                                            video_header.height);
433         }
434       }
435 
436       packet->SetExtension<RtpGenericFrameDescriptorExtension00>(
437           generic_descriptor);
438     }
439   }
440 
441   if (first_packet &&
442       send_allocation_ != SendVideoLayersAllocation::kDontSend &&
443       (video_header.frame_type == VideoFrameType::kVideoFrameKey ||
444        PacketWillLikelyBeRequestedForRestransmitionIfLost(video_header))) {
445     VideoLayersAllocation allocation = allocation_.value();
446     allocation.resolution_and_frame_rate_is_valid =
447         send_allocation_ == SendVideoLayersAllocation::kSendWithResolution;
448     packet->SetExtension<RtpVideoLayersAllocationExtension>(allocation);
449   }
450 
451   if (first_packet && video_header.video_frame_tracking_id) {
452     packet->SetExtension<VideoFrameTrackingIdExtension>(
453         *video_header.video_frame_tracking_id);
454   }
455 }
456 
SendVideo(int payload_type,absl::optional<VideoCodecType> codec_type,uint32_t rtp_timestamp,int64_t capture_time_ms,rtc::ArrayView<const uint8_t> payload,RTPVideoHeader video_header,absl::optional<int64_t> expected_retransmission_time_ms,absl::optional<int64_t> estimated_capture_clock_offset_ms)457 bool RTPSenderVideo::SendVideo(
458     int payload_type,
459     absl::optional<VideoCodecType> codec_type,
460     uint32_t rtp_timestamp,
461     int64_t capture_time_ms,
462     rtc::ArrayView<const uint8_t> payload,
463     RTPVideoHeader video_header,
464     absl::optional<int64_t> expected_retransmission_time_ms,
465     absl::optional<int64_t> estimated_capture_clock_offset_ms) {
466 #if RTC_TRACE_EVENTS_ENABLED
467   TRACE_EVENT_ASYNC_STEP1("webrtc", "Video", capture_time_ms, "Send", "type",
468                           FrameTypeToString(video_header.frame_type));
469 #endif
470   RTC_CHECK_RUNS_SERIALIZED(&send_checker_);
471 
472   if (video_header.frame_type == VideoFrameType::kEmptyFrame)
473     return true;
474 
475   if (payload.empty())
476     return false;
477 
478   int32_t retransmission_settings = retransmission_settings_;
479   if (codec_type == VideoCodecType::kVideoCodecH264) {
480     // Backward compatibility for older receivers without temporal layer logic.
481     retransmission_settings = kRetransmitBaseLayer | kRetransmitHigherLayers;
482   }
483 
484   MaybeUpdateCurrentPlayoutDelay(video_header);
485   if (video_header.frame_type == VideoFrameType::kVideoFrameKey) {
486     if (!IsNoopDelay(current_playout_delay_)) {
487       // Force playout delay on key-frames, if set.
488       playout_delay_pending_ = true;
489     }
490     if (allocation_) {
491       // Send the bitrate allocation on every key frame.
492       send_allocation_ = SendVideoLayersAllocation::kSendWithResolution;
493     }
494   }
495 
496   if (video_structure_ != nullptr && video_header.generic) {
497     active_decode_targets_tracker_.OnFrame(
498         video_structure_->decode_target_protected_by_chain,
499         video_header.generic->active_decode_targets,
500         video_header.frame_type == VideoFrameType::kVideoFrameKey,
501         video_header.generic->frame_id, video_header.generic->chain_diffs);
502   }
503 
504   const uint8_t temporal_id = GetTemporalId(video_header);
505   // No FEC protection for upper temporal layers, if used.
506   const bool use_fec = fec_type_.has_value() &&
507                        (temporal_id == 0 || temporal_id == kNoTemporalIdx);
508 
509   // Maximum size of packet including rtp headers.
510   // Extra space left in case packet will be resent using fec or rtx.
511   int packet_capacity = rtp_sender_->MaxRtpPacketSize() -
512                         (use_fec ? FecPacketOverhead() : 0) -
513                         (rtp_sender_->RtxStatus() ? kRtxHeaderSize : 0);
514 
515   std::unique_ptr<RtpPacketToSend> single_packet =
516       rtp_sender_->AllocatePacket();
517   RTC_DCHECK_LE(packet_capacity, single_packet->capacity());
518   single_packet->SetPayloadType(payload_type);
519   single_packet->SetTimestamp(rtp_timestamp);
520   single_packet->set_capture_time_ms(capture_time_ms);
521 
522   const absl::optional<AbsoluteCaptureTime> absolute_capture_time =
523       absolute_capture_time_sender_.OnSendPacket(
524           AbsoluteCaptureTimeSender::GetSource(single_packet->Ssrc(),
525                                                single_packet->Csrcs()),
526           single_packet->Timestamp(), kVideoPayloadTypeFrequency,
527           Int64MsToUQ32x32(single_packet->capture_time_ms() + NtpOffsetMs()),
528           /*estimated_capture_clock_offset=*/
529           include_capture_clock_offset_ ? estimated_capture_clock_offset_ms
530                                         : absl::nullopt);
531 
532   auto first_packet = std::make_unique<RtpPacketToSend>(*single_packet);
533   auto middle_packet = std::make_unique<RtpPacketToSend>(*single_packet);
534   auto last_packet = std::make_unique<RtpPacketToSend>(*single_packet);
535   // Simplest way to estimate how much extensions would occupy is to set them.
536   AddRtpHeaderExtensions(video_header, absolute_capture_time,
537                          /*first_packet=*/true, /*last_packet=*/true,
538                          single_packet.get());
539   AddRtpHeaderExtensions(video_header, absolute_capture_time,
540                          /*first_packet=*/true, /*last_packet=*/false,
541                          first_packet.get());
542   AddRtpHeaderExtensions(video_header, absolute_capture_time,
543                          /*first_packet=*/false, /*last_packet=*/false,
544                          middle_packet.get());
545   AddRtpHeaderExtensions(video_header, absolute_capture_time,
546                          /*first_packet=*/false, /*last_packet=*/true,
547                          last_packet.get());
548 
549   RTC_DCHECK_GT(packet_capacity, single_packet->headers_size());
550   RTC_DCHECK_GT(packet_capacity, first_packet->headers_size());
551   RTC_DCHECK_GT(packet_capacity, middle_packet->headers_size());
552   RTC_DCHECK_GT(packet_capacity, last_packet->headers_size());
553   RtpPacketizer::PayloadSizeLimits limits;
554   limits.max_payload_len = packet_capacity - middle_packet->headers_size();
555 
556   RTC_DCHECK_GE(single_packet->headers_size(), middle_packet->headers_size());
557   limits.single_packet_reduction_len =
558       single_packet->headers_size() - middle_packet->headers_size();
559 
560   RTC_DCHECK_GE(first_packet->headers_size(), middle_packet->headers_size());
561   limits.first_packet_reduction_len =
562       first_packet->headers_size() - middle_packet->headers_size();
563 
564   RTC_DCHECK_GE(last_packet->headers_size(), middle_packet->headers_size());
565   limits.last_packet_reduction_len =
566       last_packet->headers_size() - middle_packet->headers_size();
567 
568   bool has_generic_descriptor =
569       first_packet->HasExtension<RtpGenericFrameDescriptorExtension00>() ||
570       first_packet->HasExtension<RtpDependencyDescriptorExtension>();
571 
572   // Minimization of the vp8 descriptor may erase temporal_id, so use
573   // |temporal_id| rather than reference |video_header| beyond this point.
574   if (has_generic_descriptor) {
575     MinimizeDescriptor(&video_header);
576   }
577 
578   // TODO(benwright@webrtc.org) - Allocate enough to always encrypt inline.
579   rtc::Buffer encrypted_video_payload;
580   if (frame_encryptor_ != nullptr) {
581     if (!has_generic_descriptor) {
582       return false;
583     }
584 
585     const size_t max_ciphertext_size =
586         frame_encryptor_->GetMaxCiphertextByteSize(cricket::MEDIA_TYPE_VIDEO,
587                                                    payload.size());
588     encrypted_video_payload.SetSize(max_ciphertext_size);
589 
590     size_t bytes_written = 0;
591 
592     // Enable header authentication if the field trial isn't disabled.
593     std::vector<uint8_t> additional_data;
594     if (generic_descriptor_auth_experiment_) {
595       additional_data = RtpDescriptorAuthentication(video_header);
596     }
597 
598     if (frame_encryptor_->Encrypt(
599             cricket::MEDIA_TYPE_VIDEO, first_packet->Ssrc(), additional_data,
600             payload, encrypted_video_payload, &bytes_written) != 0) {
601       return false;
602     }
603 
604     encrypted_video_payload.SetSize(bytes_written);
605     payload = encrypted_video_payload;
606   } else if (require_frame_encryption_) {
607     RTC_LOG(LS_WARNING)
608         << "No FrameEncryptor is attached to this video sending stream but "
609            "one is required since require_frame_encryptor is set";
610   }
611 
612   std::unique_ptr<RtpPacketizer> packetizer =
613       RtpPacketizer::Create(codec_type, payload, limits, video_header);
614 
615   // TODO(bugs.webrtc.org/10714): retransmission_settings_ should generally be
616   // replaced by expected_retransmission_time_ms.has_value(). For now, though,
617   // only VP8 with an injected frame buffer controller actually controls it.
618   const bool allow_retransmission =
619       expected_retransmission_time_ms.has_value()
620           ? AllowRetransmission(temporal_id, retransmission_settings,
621                                 expected_retransmission_time_ms.value())
622           : false;
623   const size_t num_packets = packetizer->NumPackets();
624 
625   if (num_packets == 0)
626     return false;
627 
628   bool first_frame = first_frame_sent_();
629   std::vector<std::unique_ptr<RtpPacketToSend>> rtp_packets;
630   for (size_t i = 0; i < num_packets; ++i) {
631     std::unique_ptr<RtpPacketToSend> packet;
632     int expected_payload_capacity;
633     // Choose right packet template:
634     if (num_packets == 1) {
635       packet = std::move(single_packet);
636       expected_payload_capacity =
637           limits.max_payload_len - limits.single_packet_reduction_len;
638     } else if (i == 0) {
639       packet = std::move(first_packet);
640       expected_payload_capacity =
641           limits.max_payload_len - limits.first_packet_reduction_len;
642     } else if (i == num_packets - 1) {
643       packet = std::move(last_packet);
644       expected_payload_capacity =
645           limits.max_payload_len - limits.last_packet_reduction_len;
646     } else {
647       packet = std::make_unique<RtpPacketToSend>(*middle_packet);
648       expected_payload_capacity = limits.max_payload_len;
649     }
650 
651     packet->set_first_packet_of_frame(i == 0);
652 
653     if (!packetizer->NextPacket(packet.get()))
654       return false;
655     RTC_DCHECK_LE(packet->payload_size(), expected_payload_capacity);
656 
657     packet->set_allow_retransmission(allow_retransmission);
658     packet->set_is_key_frame(video_header.frame_type ==
659                              VideoFrameType::kVideoFrameKey);
660 
661     packet->set_is_key_frame(video_header.frame_type == VideoFrameType::kVideoFrameKey);
662 
663     // Put packetization finish timestamp into extension.
664     if (packet->HasExtension<VideoTimingExtension>()) {
665       packet->set_packetization_finish_time_ms(clock_->TimeInMilliseconds());
666     }
667 
668     packet->set_fec_protect_packet(use_fec);
669 
670     if (red_enabled()) {
671       // TODO(sprang): Consider packetizing directly into packets with the RED
672       // header already in place, to avoid this copy.
673       std::unique_ptr<RtpPacketToSend> red_packet(new RtpPacketToSend(*packet));
674       BuildRedPayload(*packet, red_packet.get());
675       red_packet->SetPayloadType(*red_payload_type_);
676       red_packet->set_is_red(true);
677 
678       // Append |red_packet| instead of |packet| to output.
679       red_packet->set_packet_type(RtpPacketMediaType::kVideo);
680       red_packet->set_allow_retransmission(packet->allow_retransmission());
681       rtp_packets.emplace_back(std::move(red_packet));
682     } else {
683       packet->set_packet_type(RtpPacketMediaType::kVideo);
684       rtp_packets.emplace_back(std::move(packet));
685     }
686 
687     if (first_frame) {
688       if (i == 0) {
689         RTC_LOG(LS_INFO)
690             << "Sent first RTP packet of the first video frame (pre-pacer)";
691       }
692       if (i == num_packets - 1) {
693         RTC_LOG(LS_INFO)
694             << "Sent last RTP packet of the first video frame (pre-pacer)";
695       }
696     }
697   }
698 
699   if (!rtp_sender_->AssignSequenceNumbersAndStoreLastPacketState(rtp_packets)) {
700     // Media not being sent.
701     return false;
702   }
703 
704   LogAndSendToNetwork(std::move(rtp_packets), payload.size());
705 
706   // Update details about the last sent frame.
707   last_rotation_ = video_header.rotation;
708 
709   if (video_header.color_space != last_color_space_) {
710     last_color_space_ = video_header.color_space;
711     transmit_color_space_next_frame_ = !IsBaseLayer(video_header);
712   } else {
713     transmit_color_space_next_frame_ =
714         transmit_color_space_next_frame_ ? !IsBaseLayer(video_header) : false;
715   }
716 
717   if (video_header.frame_type == VideoFrameType::kVideoFrameKey ||
718       PacketWillLikelyBeRequestedForRestransmitionIfLost(video_header)) {
719     // This frame will likely be delivered, no need to populate playout
720     // delay extensions until it changes again.
721     playout_delay_pending_ = false;
722     send_allocation_ = SendVideoLayersAllocation::kDontSend;
723   }
724 
725   TRACE_EVENT_ASYNC_END1("webrtc", "Video", capture_time_ms, "timestamp",
726                          rtp_timestamp);
727   return true;
728 }
729 
SendEncodedImage(int payload_type,absl::optional<VideoCodecType> codec_type,uint32_t rtp_timestamp,const EncodedImage & encoded_image,RTPVideoHeader video_header,absl::optional<int64_t> expected_retransmission_time_ms)730 bool RTPSenderVideo::SendEncodedImage(
731     int payload_type,
732     absl::optional<VideoCodecType> codec_type,
733     uint32_t rtp_timestamp,
734     const EncodedImage& encoded_image,
735     RTPVideoHeader video_header,
736     absl::optional<int64_t> expected_retransmission_time_ms) {
737   if (frame_transformer_delegate_) {
738     // The frame will be sent async once transformed.
739     return frame_transformer_delegate_->TransformFrame(
740         payload_type, codec_type, rtp_timestamp, encoded_image, video_header,
741         expected_retransmission_time_ms);
742   }
743   return SendVideo(payload_type, codec_type, rtp_timestamp,
744                    encoded_image.capture_time_ms_, encoded_image, video_header,
745                    expected_retransmission_time_ms);
746 }
747 
PacketizationOverheadBps() const748 uint32_t RTPSenderVideo::PacketizationOverheadBps() const {
749   MutexLock lock(&stats_mutex_);
750   return packetization_overhead_bitrate_.Rate(clock_->TimeInMilliseconds())
751       .value_or(0);
752 }
753 
AllowRetransmission(uint8_t temporal_id,int32_t retransmission_settings,int64_t expected_retransmission_time_ms)754 bool RTPSenderVideo::AllowRetransmission(
755     uint8_t temporal_id,
756     int32_t retransmission_settings,
757     int64_t expected_retransmission_time_ms) {
758   if (retransmission_settings == kRetransmitOff)
759     return false;
760 
761   MutexLock lock(&stats_mutex_);
762   // Media packet storage.
763   if ((retransmission_settings & kConditionallyRetransmitHigherLayers) &&
764       UpdateConditionalRetransmit(temporal_id,
765                                   expected_retransmission_time_ms)) {
766     retransmission_settings |= kRetransmitHigherLayers;
767   }
768 
769   if (temporal_id == kNoTemporalIdx)
770     return true;
771 
772   if ((retransmission_settings & kRetransmitBaseLayer) && temporal_id == 0)
773     return true;
774 
775   if ((retransmission_settings & kRetransmitHigherLayers) && temporal_id > 0)
776     return true;
777 
778   return false;
779 }
780 
GetTemporalId(const RTPVideoHeader & header)781 uint8_t RTPSenderVideo::GetTemporalId(const RTPVideoHeader& header) {
782   struct TemporalIdGetter {
783     uint8_t operator()(const RTPVideoHeaderVP8& vp8) { return vp8.temporalIdx; }
784     uint8_t operator()(const RTPVideoHeaderVP9& vp9) {
785       return vp9.temporal_idx;
786     }
787     uint8_t operator()(const RTPVideoHeaderH264&) { return kNoTemporalIdx; }
788     uint8_t operator()(const RTPVideoHeaderLegacyGeneric&) {
789       return kNoTemporalIdx;
790     }
791 #ifndef DISABLE_H265
792     uint8_t operator()(const RTPVideoHeaderH265&) { return kNoTemporalIdx; }
793 #endif
794     uint8_t operator()(const absl::monostate&) { return kNoTemporalIdx; }
795   };
796   return absl::visit(TemporalIdGetter(), header.video_type_header);
797 }
798 
UpdateConditionalRetransmit(uint8_t temporal_id,int64_t expected_retransmission_time_ms)799 bool RTPSenderVideo::UpdateConditionalRetransmit(
800     uint8_t temporal_id,
801     int64_t expected_retransmission_time_ms) {
802   int64_t now_ms = clock_->TimeInMilliseconds();
803   // Update stats for any temporal layer.
804   TemporalLayerStats* current_layer_stats =
805       &frame_stats_by_temporal_layer_[temporal_id];
806   current_layer_stats->frame_rate_fp1000s.Update(1, now_ms);
807   int64_t tl_frame_interval = now_ms - current_layer_stats->last_frame_time_ms;
808   current_layer_stats->last_frame_time_ms = now_ms;
809 
810   // Conditional retransmit only applies to upper layers.
811   if (temporal_id != kNoTemporalIdx && temporal_id > 0) {
812     if (tl_frame_interval >= kMaxUnretransmittableFrameIntervalMs) {
813       // Too long since a retransmittable frame in this layer, enable NACK
814       // protection.
815       return true;
816     } else {
817       // Estimate when the next frame of any lower layer will be sent.
818       const int64_t kUndefined = std::numeric_limits<int64_t>::max();
819       int64_t expected_next_frame_time = kUndefined;
820       for (int i = temporal_id - 1; i >= 0; --i) {
821         TemporalLayerStats* stats = &frame_stats_by_temporal_layer_[i];
822         absl::optional<uint32_t> rate = stats->frame_rate_fp1000s.Rate(now_ms);
823         if (rate) {
824           int64_t tl_next = stats->last_frame_time_ms + 1000000 / *rate;
825           if (tl_next - now_ms > -expected_retransmission_time_ms &&
826               tl_next < expected_next_frame_time) {
827             expected_next_frame_time = tl_next;
828           }
829         }
830       }
831 
832       if (expected_next_frame_time == kUndefined ||
833           expected_next_frame_time - now_ms > expected_retransmission_time_ms) {
834         // The next frame in a lower layer is expected at a later time (or
835         // unable to tell due to lack of data) than a retransmission is
836         // estimated to be able to arrive, so allow this packet to be nacked.
837         return true;
838       }
839     }
840   }
841 
842   return false;
843 }
844 
MaybeUpdateCurrentPlayoutDelay(const RTPVideoHeader & header)845 void RTPSenderVideo::MaybeUpdateCurrentPlayoutDelay(
846     const RTPVideoHeader& header) {
847   VideoPlayoutDelay requested_delay =
848       forced_playout_delay_.value_or(header.playout_delay);
849 
850   if (IsNoopDelay(requested_delay)) {
851     return;
852   }
853 
854   if (requested_delay.min_ms > PlayoutDelayLimits::kMaxMs ||
855       requested_delay.max_ms > PlayoutDelayLimits::kMaxMs) {
856     RTC_DLOG(LS_ERROR)
857         << "Requested playout delay values out of range, ignored";
858     return;
859   }
860   if (requested_delay.max_ms != -1 &&
861       requested_delay.min_ms > requested_delay.max_ms) {
862     RTC_DLOG(LS_ERROR) << "Requested playout delay values out of order";
863     return;
864   }
865 
866   if (!playout_delay_pending_) {
867     current_playout_delay_ = requested_delay;
868     playout_delay_pending_ = true;
869     return;
870   }
871 
872   if ((requested_delay.min_ms == -1 ||
873        requested_delay.min_ms == current_playout_delay_.min_ms) &&
874       (requested_delay.max_ms == -1 ||
875        requested_delay.max_ms == current_playout_delay_.max_ms)) {
876     // No change, ignore.
877     return;
878   }
879 
880   if (requested_delay.min_ms == -1) {
881     RTC_DCHECK_GE(requested_delay.max_ms, 0);
882     requested_delay.min_ms =
883         std::min(current_playout_delay_.min_ms, requested_delay.max_ms);
884   }
885   if (requested_delay.max_ms == -1) {
886     requested_delay.max_ms =
887         std::max(current_playout_delay_.max_ms, requested_delay.min_ms);
888   }
889 
890   current_playout_delay_ = requested_delay;
891   playout_delay_pending_ = true;
892 }
893 
894 }  // namespace webrtc
895