1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/session_info.h"
12 
13 #include <assert.h>
14 #include <string.h>
15 
16 #include <vector>
17 
18 #include "absl/types/variant.h"
19 #include "modules/include/module_common_types.h"
20 #include "modules/include/module_common_types_public.h"
21 #include "modules/video_coding/codecs/interface/common_constants.h"
22 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
23 #include "modules/video_coding/jitter_buffer_common.h"
24 #include "modules/video_coding/packet.h"
25 #include "rtc_base/logging.h"
26 
27 namespace webrtc {
28 
29 namespace {
30 
BufferToUWord16(const uint8_t * dataBuffer)31 uint16_t BufferToUWord16(const uint8_t* dataBuffer) {
32   return (dataBuffer[0] << 8) | dataBuffer[1];
33 }
34 
35 }  // namespace
36 
VCMSessionInfo()37 VCMSessionInfo::VCMSessionInfo()
38     : complete_(false),
39       frame_type_(VideoFrameType::kVideoFrameDelta),
40       packets_(),
41       empty_seq_num_low_(-1),
42       empty_seq_num_high_(-1),
43       first_packet_seq_num_(-1),
44       last_packet_seq_num_(-1) {}
45 
~VCMSessionInfo()46 VCMSessionInfo::~VCMSessionInfo() {}
47 
UpdateDataPointers(const uint8_t * old_base_ptr,const uint8_t * new_base_ptr)48 void VCMSessionInfo::UpdateDataPointers(const uint8_t* old_base_ptr,
49                                         const uint8_t* new_base_ptr) {
50   for (PacketIterator it = packets_.begin(); it != packets_.end(); ++it)
51     if ((*it).dataPtr != NULL) {
52       assert(old_base_ptr != NULL && new_base_ptr != NULL);
53       (*it).dataPtr = new_base_ptr + ((*it).dataPtr - old_base_ptr);
54     }
55 }
56 
LowSequenceNumber() const57 int VCMSessionInfo::LowSequenceNumber() const {
58   if (packets_.empty())
59     return empty_seq_num_low_;
60   return packets_.front().seqNum;
61 }
62 
HighSequenceNumber() const63 int VCMSessionInfo::HighSequenceNumber() const {
64   if (packets_.empty())
65     return empty_seq_num_high_;
66   if (empty_seq_num_high_ == -1)
67     return packets_.back().seqNum;
68   return LatestSequenceNumber(packets_.back().seqNum, empty_seq_num_high_);
69 }
70 
PictureId() const71 int VCMSessionInfo::PictureId() const {
72   if (packets_.empty())
73     return kNoPictureId;
74   if (packets_.front().video_header.codec == kVideoCodecVP8) {
75     return absl::get<RTPVideoHeaderVP8>(
76                packets_.front().video_header.video_type_header)
77         .pictureId;
78   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
79     return absl::get<RTPVideoHeaderVP9>(
80                packets_.front().video_header.video_type_header)
81         .picture_id;
82   } else {
83     return kNoPictureId;
84   }
85 }
86 
TemporalId() const87 int VCMSessionInfo::TemporalId() const {
88   if (packets_.empty())
89     return kNoTemporalIdx;
90   if (packets_.front().video_header.codec == kVideoCodecVP8) {
91     return absl::get<RTPVideoHeaderVP8>(
92                packets_.front().video_header.video_type_header)
93         .temporalIdx;
94   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
95     return absl::get<RTPVideoHeaderVP9>(
96                packets_.front().video_header.video_type_header)
97         .temporal_idx;
98   } else {
99     return kNoTemporalIdx;
100   }
101 }
102 
LayerSync() const103 bool VCMSessionInfo::LayerSync() const {
104   if (packets_.empty())
105     return false;
106   if (packets_.front().video_header.codec == kVideoCodecVP8) {
107     return absl::get<RTPVideoHeaderVP8>(
108                packets_.front().video_header.video_type_header)
109         .layerSync;
110   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
111     return absl::get<RTPVideoHeaderVP9>(
112                packets_.front().video_header.video_type_header)
113         .temporal_up_switch;
114   } else {
115     return false;
116   }
117 }
118 
Tl0PicId() const119 int VCMSessionInfo::Tl0PicId() const {
120   if (packets_.empty())
121     return kNoTl0PicIdx;
122   if (packets_.front().video_header.codec == kVideoCodecVP8) {
123     return absl::get<RTPVideoHeaderVP8>(
124                packets_.front().video_header.video_type_header)
125         .tl0PicIdx;
126   } else if (packets_.front().video_header.codec == kVideoCodecVP9) {
127     return absl::get<RTPVideoHeaderVP9>(
128                packets_.front().video_header.video_type_header)
129         .tl0_pic_idx;
130   } else {
131     return kNoTl0PicIdx;
132   }
133 }
134 
GetNaluInfos() const135 std::vector<NaluInfo> VCMSessionInfo::GetNaluInfos() const {
136   if (packets_.empty() ||
137       packets_.front().video_header.codec != kVideoCodecH264)
138     return std::vector<NaluInfo>();
139   std::vector<NaluInfo> nalu_infos;
140   for (const VCMPacket& packet : packets_) {
141     const auto& h264 =
142         absl::get<RTPVideoHeaderH264>(packet.video_header.video_type_header);
143     for (size_t i = 0; i < h264.nalus_length; ++i) {
144       nalu_infos.push_back(h264.nalus[i]);
145     }
146   }
147   return nalu_infos;
148 }
149 
SetGofInfo(const GofInfoVP9 & gof_info,size_t idx)150 void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
151   if (packets_.empty())
152     return;
153 
154   auto* vp9_header = absl::get_if<RTPVideoHeaderVP9>(
155       &packets_.front().video_header.video_type_header);
156   if (!vp9_header || vp9_header->flexible_mode)
157     return;
158 
159   vp9_header->temporal_idx = gof_info.temporal_idx[idx];
160   vp9_header->temporal_up_switch = gof_info.temporal_up_switch[idx];
161   vp9_header->num_ref_pics = gof_info.num_ref_pics[idx];
162   for (uint8_t i = 0; i < gof_info.num_ref_pics[idx]; ++i) {
163     vp9_header->pid_diff[i] = gof_info.pid_diff[idx][i];
164   }
165 }
166 
Reset()167 void VCMSessionInfo::Reset() {
168   complete_ = false;
169   frame_type_ = VideoFrameType::kVideoFrameDelta;
170   packets_.clear();
171   empty_seq_num_low_ = -1;
172   empty_seq_num_high_ = -1;
173   first_packet_seq_num_ = -1;
174   last_packet_seq_num_ = -1;
175 }
176 
SessionLength() const177 size_t VCMSessionInfo::SessionLength() const {
178   size_t length = 0;
179   for (PacketIteratorConst it = packets_.begin(); it != packets_.end(); ++it)
180     length += (*it).sizeBytes;
181   return length;
182 }
183 
NumPackets() const184 int VCMSessionInfo::NumPackets() const {
185   return packets_.size();
186 }
187 
InsertBuffer(uint8_t * frame_buffer,PacketIterator packet_it)188 size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer,
189                                     PacketIterator packet_it) {
190   VCMPacket& packet = *packet_it;
191   PacketIterator it;
192 
193   // Calculate the offset into the frame buffer for this packet.
194   size_t offset = 0;
195   for (it = packets_.begin(); it != packet_it; ++it)
196     offset += (*it).sizeBytes;
197 
198   // Set the data pointer to pointing to the start of this packet in the
199   // frame buffer.
200   const uint8_t* packet_buffer = packet.dataPtr;
201   packet.dataPtr = frame_buffer + offset;
202 
203   // We handle H.264 STAP-A packets in a special way as we need to remove the
204   // two length bytes between each NAL unit, and potentially add start codes.
205   // TODO(pbos): Remove H264 parsing from this step and use a fragmentation
206   // header supplied by the H264 depacketizer.
207   const size_t kH264NALHeaderLengthInBytes = 1;
208   const size_t kLengthFieldLength = 2;
209   const auto* h264 =
210       absl::get_if<RTPVideoHeaderH264>(&packet.video_header.video_type_header);
211   if (h264 && h264->packetization_type == kH264StapA) {
212     size_t required_length = 0;
213     const uint8_t* nalu_ptr = packet_buffer + kH264NALHeaderLengthInBytes;
214     // Must check that incoming data length doesn't extend past end of buffer.
215     // We allow for 100 bytes of expansion due to startcodes being longer than
216     // length fields.
217     while (nalu_ptr + kLengthFieldLength <= packet_buffer + packet.sizeBytes) {
218       size_t length = BufferToUWord16(nalu_ptr);
219       if (nalu_ptr + kLengthFieldLength + length <= packet_buffer + packet.sizeBytes) {
220         required_length +=
221           length + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0);
222         nalu_ptr += kLengthFieldLength + length;
223       } else {
224         // Something is very wrong!
225         RTC_LOG(LS_ERROR) << "Failed to insert packet due to corrupt H264 STAP-A";
226         return 0;
227       }
228     }
229     ShiftSubsequentPackets(packet_it, required_length);
230     nalu_ptr = packet_buffer + kH264NALHeaderLengthInBytes;
231     uint8_t* frame_buffer_ptr = frame_buffer + offset;
232     // we already know we won't go past end-of-buffer
233     while (nalu_ptr + kLengthFieldLength <= packet_buffer + packet.sizeBytes) {
234       size_t length = BufferToUWord16(nalu_ptr);
235       nalu_ptr += kLengthFieldLength;
236       frame_buffer_ptr += Insert(nalu_ptr, length, packet.insertStartCode,
237                                  const_cast<uint8_t*>(frame_buffer_ptr));
238       nalu_ptr += length;
239     }
240     packet.sizeBytes = required_length;
241     return packet.sizeBytes;
242   }
243   ShiftSubsequentPackets(
244       packet_it, packet.sizeBytes +
245                      (packet.insertStartCode ? kH264StartCodeLengthBytes : 0));
246 
247   packet.sizeBytes =
248       Insert(packet_buffer, packet.sizeBytes, packet.insertStartCode,
249              const_cast<uint8_t*>(packet.dataPtr));
250   return packet.sizeBytes;
251 }
252 
Insert(const uint8_t * buffer,size_t length,bool insert_start_code,uint8_t * frame_buffer)253 size_t VCMSessionInfo::Insert(const uint8_t* buffer,
254                               size_t length,
255                               bool insert_start_code,
256                               uint8_t* frame_buffer) {
257   if (insert_start_code) {
258     const unsigned char startCode[] = {0, 0, 0, 1};
259     memcpy(frame_buffer, startCode, kH264StartCodeLengthBytes);
260   }
261   memcpy(frame_buffer + (insert_start_code ? kH264StartCodeLengthBytes : 0),
262          buffer, length);
263   length += (insert_start_code ? kH264StartCodeLengthBytes : 0);
264 
265   return length;
266 }
267 
ShiftSubsequentPackets(PacketIterator it,int steps_to_shift)268 void VCMSessionInfo::ShiftSubsequentPackets(PacketIterator it,
269                                             int steps_to_shift) {
270   ++it;
271   if (it == packets_.end())
272     return;
273   uint8_t* first_packet_ptr = const_cast<uint8_t*>((*it).dataPtr);
274   int shift_length = 0;
275   // Calculate the total move length and move the data pointers in advance.
276   for (; it != packets_.end(); ++it) {
277     shift_length += (*it).sizeBytes;
278     if ((*it).dataPtr != NULL)
279       (*it).dataPtr += steps_to_shift;
280   }
281   memmove(first_packet_ptr + steps_to_shift, first_packet_ptr, shift_length);
282 }
283 
UpdateCompleteSession()284 void VCMSessionInfo::UpdateCompleteSession() {
285   if (HaveFirstPacket() && HaveLastPacket()) {
286     // Do we have all the packets in this session?
287     bool complete_session = true;
288     PacketIterator it = packets_.begin();
289     PacketIterator prev_it = it;
290     ++it;
291     for (; it != packets_.end(); ++it) {
292       if (!InSequence(it, prev_it)) {
293         complete_session = false;
294         break;
295       }
296       prev_it = it;
297     }
298     complete_ = complete_session;
299   }
300 }
301 
complete() const302 bool VCMSessionInfo::complete() const {
303   return complete_;
304 }
305 
306 // Find the end of the NAL unit which the packet pointed to by |packet_it|
307 // belongs to. Returns an iterator to the last packet of the frame if the end
308 // of the NAL unit wasn't found.
FindNaluEnd(PacketIterator packet_it) const309 VCMSessionInfo::PacketIterator VCMSessionInfo::FindNaluEnd(
310     PacketIterator packet_it) const {
311   if ((*packet_it).completeNALU == kNaluEnd ||
312       (*packet_it).completeNALU == kNaluComplete) {
313     return packet_it;
314   }
315   // Find the end of the NAL unit.
316   for (; packet_it != packets_.end(); ++packet_it) {
317     if (((*packet_it).completeNALU == kNaluComplete &&
318          (*packet_it).sizeBytes > 0) ||
319         // Found next NALU.
320         (*packet_it).completeNALU == kNaluStart)
321       return --packet_it;
322     if ((*packet_it).completeNALU == kNaluEnd)
323       return packet_it;
324   }
325   // The end wasn't found.
326   return --packet_it;
327 }
328 
DeletePacketData(PacketIterator start,PacketIterator end)329 size_t VCMSessionInfo::DeletePacketData(PacketIterator start,
330                                         PacketIterator end) {
331   size_t bytes_to_delete = 0;  // The number of bytes to delete.
332   PacketIterator packet_after_end = end;
333   ++packet_after_end;
334 
335   // Get the number of bytes to delete.
336   // Clear the size of these packets.
337   for (PacketIterator it = start; it != packet_after_end; ++it) {
338     bytes_to_delete += (*it).sizeBytes;
339     (*it).sizeBytes = 0;
340     (*it).dataPtr = NULL;
341   }
342   if (bytes_to_delete > 0)
343     ShiftSubsequentPackets(end, -static_cast<int>(bytes_to_delete));
344   return bytes_to_delete;
345 }
346 
FindNextPartitionBeginning(PacketIterator it) const347 VCMSessionInfo::PacketIterator VCMSessionInfo::FindNextPartitionBeginning(
348     PacketIterator it) const {
349   while (it != packets_.end()) {
350     if (absl::get<RTPVideoHeaderVP8>((*it).video_header.video_type_header)
351             .beginningOfPartition) {
352       return it;
353     }
354     ++it;
355   }
356   return it;
357 }
358 
FindPartitionEnd(PacketIterator it) const359 VCMSessionInfo::PacketIterator VCMSessionInfo::FindPartitionEnd(
360     PacketIterator it) const {
361   assert((*it).codec() == kVideoCodecVP8);
362   PacketIterator prev_it = it;
363   const int partition_id =
364       absl::get<RTPVideoHeaderVP8>((*it).video_header.video_type_header)
365           .partitionId;
366   while (it != packets_.end()) {
367     bool beginning =
368         absl::get<RTPVideoHeaderVP8>((*it).video_header.video_type_header)
369             .beginningOfPartition;
370     int current_partition_id =
371         absl::get<RTPVideoHeaderVP8>((*it).video_header.video_type_header)
372             .partitionId;
373     bool packet_loss_found = (!beginning && !InSequence(it, prev_it));
374     if (packet_loss_found ||
375         (beginning && current_partition_id != partition_id)) {
376       // Missing packet, the previous packet was the last in sequence.
377       return prev_it;
378     }
379     prev_it = it;
380     ++it;
381   }
382   return prev_it;
383 }
384 
InSequence(const PacketIterator & packet_it,const PacketIterator & prev_packet_it)385 bool VCMSessionInfo::InSequence(const PacketIterator& packet_it,
386                                 const PacketIterator& prev_packet_it) {
387   // If the two iterators are pointing to the same packet they are considered
388   // to be in sequence.
389   return (packet_it == prev_packet_it ||
390           (static_cast<uint16_t>((*prev_packet_it).seqNum + 1) ==
391            (*packet_it).seqNum));
392 }
393 
MakeDecodable()394 size_t VCMSessionInfo::MakeDecodable() {
395   size_t return_length = 0;
396   if (packets_.empty()) {
397     return 0;
398   }
399   PacketIterator it = packets_.begin();
400   // Make sure we remove the first NAL unit if it's not decodable.
401   if ((*it).completeNALU == kNaluIncomplete || (*it).completeNALU == kNaluEnd) {
402     PacketIterator nalu_end = FindNaluEnd(it);
403     return_length += DeletePacketData(it, nalu_end);
404     it = nalu_end;
405   }
406   PacketIterator prev_it = it;
407   // Take care of the rest of the NAL units.
408   for (; it != packets_.end(); ++it) {
409     bool start_of_nalu = ((*it).completeNALU == kNaluStart ||
410                           (*it).completeNALU == kNaluComplete);
411     if (!start_of_nalu && !InSequence(it, prev_it)) {
412       // Found a sequence number gap due to packet loss.
413       PacketIterator nalu_end = FindNaluEnd(it);
414       return_length += DeletePacketData(it, nalu_end);
415       it = nalu_end;
416     }
417     prev_it = it;
418   }
419   return return_length;
420 }
421 
HaveFirstPacket() const422 bool VCMSessionInfo::HaveFirstPacket() const {
423   return !packets_.empty() && (first_packet_seq_num_ != -1);
424 }
425 
HaveLastPacket() const426 bool VCMSessionInfo::HaveLastPacket() const {
427   return !packets_.empty() && (last_packet_seq_num_ != -1);
428 }
429 
InsertPacket(const VCMPacket & packet,uint8_t * frame_buffer,const FrameData & frame_data)430 int VCMSessionInfo::InsertPacket(const VCMPacket& packet,
431                                  uint8_t* frame_buffer,
432                                  const FrameData& frame_data) {
433   if (packet.video_header.frame_type == VideoFrameType::kEmptyFrame) {
434     // Update sequence number of an empty packet.
435     // Only media packets are inserted into the packet list.
436     InformOfEmptyPacket(packet.seqNum);
437     return 0;
438   }
439 
440   if (packets_.size() == kMaxPacketsInSession) {
441     RTC_LOG(LS_ERROR) << "Max number of packets per frame has been reached.";
442     return -1;
443   }
444 
445   // Find the position of this packet in the packet list in sequence number
446   // order and insert it. Loop over the list in reverse order.
447   ReversePacketIterator rit = packets_.rbegin();
448   for (; rit != packets_.rend(); ++rit)
449     if (LatestSequenceNumber(packet.seqNum, (*rit).seqNum) == packet.seqNum)
450       break;
451 
452   // Check for duplicate packets.
453   if (rit != packets_.rend() && (*rit).seqNum == packet.seqNum &&
454       (*rit).sizeBytes > 0)
455     return -2;
456 
457   if (packet.codec() == kVideoCodecH264) {
458     // H.264 can have leading or trailing non-VCL (Video Coding Layer)
459     // NALUs, such as SPS/PPS/SEI and others.  Also, the RTP marker bit is
460     // not reliable for the last packet of a frame (RFC 6184 5.1 - "Decoders
461     // [] MUST NOT rely on this property"), so allow out-of-order packets to
462     // update the first and last seq# range.  Also mark as a key frame if
463     // any packet is of that type.
464     if (frame_type_ != VideoFrameType::kVideoFrameKey) {
465       frame_type_ = packet.video_header.frame_type;
466     }
467     if (packet.is_first_packet_in_frame() &&
468         (first_packet_seq_num_ == -1 ||
469          IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) {
470       first_packet_seq_num_ = packet.seqNum;
471     }
472     // Note: the code does *not* currently handle the Marker bit being totally
473     // absent from a frame.  It does not, however, depend on it being on the last
474     // packet of the 'frame'/'session'.
475     if (packet.markerBit &&
476         (last_packet_seq_num_ == -1 ||
477          IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
478       last_packet_seq_num_ = packet.seqNum;
479     }
480   } else {
481     // Only insert media packets between first and last packets (when
482     // available).
483     // Placing check here, as to properly account for duplicate packets.
484     // Check if this is first packet (only valid for some codecs)
485     // Should only be set for one packet per session.
486     if (packet.is_first_packet_in_frame() && first_packet_seq_num_ == -1) {
487       // The first packet in a frame signals the frame type.
488       frame_type_ = packet.video_header.frame_type;
489       // Store the sequence number for the first packet.
490       first_packet_seq_num_ = static_cast<int>(packet.seqNum);
491     } else if (first_packet_seq_num_ != -1 &&
492                IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum)) {
493       RTC_LOG(LS_WARNING)
494           << "Received packet with a sequence number which is out "
495              "of frame boundaries";
496       return -3;
497     } else if (frame_type_ == VideoFrameType::kEmptyFrame &&
498                packet.video_header.frame_type != VideoFrameType::kEmptyFrame) {
499       // Update the frame type with the type of the first media packet.
500       // TODO(mikhal): Can this trigger?
501       frame_type_ = packet.video_header.frame_type;
502     }
503 
504     // Track the marker bit, should only be set for one packet per session.
505     if (packet.markerBit && last_packet_seq_num_ == -1) {
506       last_packet_seq_num_ = static_cast<int>(packet.seqNum);
507     } else if (last_packet_seq_num_ != -1 &&
508                IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
509       RTC_LOG(LS_WARNING)
510           << "Received packet with a sequence number which is out "
511              "of frame boundaries";
512       return -3;
513     }
514   }
515 
516   // The insert operation invalidates the iterator |rit|.
517   PacketIterator packet_list_it = packets_.insert(rit.base(), packet);
518 
519   size_t returnLength = InsertBuffer(frame_buffer, packet_list_it);
520   UpdateCompleteSession();
521   return static_cast<int>(returnLength);
522 }
523 
InformOfEmptyPacket(uint16_t seq_num)524 void VCMSessionInfo::InformOfEmptyPacket(uint16_t seq_num) {
525   // Empty packets may be FEC or filler packets. They are sequential and
526   // follow the data packets, therefore, we should only keep track of the high
527   // and low sequence numbers and may assume that the packets in between are
528   // empty packets belonging to the same frame (timestamp).
529   if (empty_seq_num_high_ == -1)
530     empty_seq_num_high_ = seq_num;
531   else
532     empty_seq_num_high_ = LatestSequenceNumber(seq_num, empty_seq_num_high_);
533   if (empty_seq_num_low_ == -1 ||
534       IsNewerSequenceNumber(empty_seq_num_low_, seq_num))
535     empty_seq_num_low_ = seq_num;
536 }
537 
538 }  // namespace webrtc
539