1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/session_info.h"
12 
13 #include "modules/video_coding/packet.h"
14 #include "rtc_base/logging.h"
15 
16 namespace webrtc {
17 
18 namespace {
19 
BufferToUWord16(const uint8_t * dataBuffer)20 uint16_t BufferToUWord16(const uint8_t* dataBuffer) {
21   return (dataBuffer[0] << 8) | dataBuffer[1];
22 }
23 
24 }  // namespace
25 
VCMSessionInfo()26 VCMSessionInfo::VCMSessionInfo()
27     : session_nack_(false),
28       complete_(false),
29       decodable_(false),
30       frame_type_(kVideoFrameDelta),
31       packets_(),
32       empty_seq_num_low_(-1),
33       empty_seq_num_high_(-1),
34       first_packet_seq_num_(-1),
35       last_packet_seq_num_(-1) {}
36 
UpdateDataPointers(const uint8_t * old_base_ptr,const uint8_t * new_base_ptr)37 void VCMSessionInfo::UpdateDataPointers(const uint8_t* old_base_ptr,
38                                         const uint8_t* new_base_ptr) {
39   for (PacketIterator it = packets_.begin(); it != packets_.end(); ++it)
40     if ((*it).dataPtr != NULL) {
41       assert(old_base_ptr != NULL && new_base_ptr != NULL);
42       (*it).dataPtr = new_base_ptr + ((*it).dataPtr - old_base_ptr);
43     }
44 }
45 
LowSequenceNumber() const46 int VCMSessionInfo::LowSequenceNumber() const {
47   if (packets_.empty())
48     return empty_seq_num_low_;
49   return packets_.front().seqNum;
50 }
51 
HighSequenceNumber() const52 int VCMSessionInfo::HighSequenceNumber() const {
53   if (packets_.empty())
54     return empty_seq_num_high_;
55   if (empty_seq_num_high_ == -1)
56     return packets_.back().seqNum;
57   return LatestSequenceNumber(packets_.back().seqNum, empty_seq_num_high_);
58 }
59 
PictureId() const60 int VCMSessionInfo::PictureId() const {
61   if (packets_.empty())
62     return kNoPictureId;
63   if (packets_.front().video_header.codec == kRtpVideoVp8) {
64     return packets_.front().video_header.codecHeader.VP8.pictureId;
65   } else if (packets_.front().video_header.codec == kRtpVideoVp9) {
66     return packets_.front().video_header.codecHeader.VP9.picture_id;
67   } else {
68     return kNoPictureId;
69   }
70 }
71 
TemporalId() const72 int VCMSessionInfo::TemporalId() const {
73   if (packets_.empty())
74     return kNoTemporalIdx;
75   if (packets_.front().video_header.codec == kRtpVideoVp8) {
76     return packets_.front().video_header.codecHeader.VP8.temporalIdx;
77   } else if (packets_.front().video_header.codec == kRtpVideoVp9) {
78     return packets_.front().video_header.codecHeader.VP9.temporal_idx;
79   } else {
80     return kNoTemporalIdx;
81   }
82 }
83 
LayerSync() const84 bool VCMSessionInfo::LayerSync() const {
85   if (packets_.empty())
86     return false;
87   if (packets_.front().video_header.codec == kRtpVideoVp8) {
88     return packets_.front().video_header.codecHeader.VP8.layerSync;
89   } else if (packets_.front().video_header.codec == kRtpVideoVp9) {
90     return packets_.front().video_header.codecHeader.VP9.temporal_up_switch;
91   } else {
92     return false;
93   }
94 }
95 
Tl0PicId() const96 int VCMSessionInfo::Tl0PicId() const {
97   if (packets_.empty())
98     return kNoTl0PicIdx;
99   if (packets_.front().video_header.codec == kRtpVideoVp8) {
100     return packets_.front().video_header.codecHeader.VP8.tl0PicIdx;
101   } else if (packets_.front().video_header.codec == kRtpVideoVp9) {
102     return packets_.front().video_header.codecHeader.VP9.tl0_pic_idx;
103   } else {
104     return kNoTl0PicIdx;
105   }
106 }
107 
NonReference() const108 bool VCMSessionInfo::NonReference() const {
109   if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoVp8)
110     return false;
111   return packets_.front().video_header.codecHeader.VP8.nonReference;
112 }
113 
GetNaluInfos() const114 std::vector<NaluInfo> VCMSessionInfo::GetNaluInfos() const {
115   if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoH264)
116     return std::vector<NaluInfo>();
117   std::vector<NaluInfo> nalu_infos;
118   for (const VCMPacket& packet : packets_) {
119     for (size_t i = 0; i < packet.video_header.codecHeader.H264.nalus_length;
120          ++i) {
121       nalu_infos.push_back(packet.video_header.codecHeader.H264.nalus[i]);
122     }
123   }
124   return nalu_infos;
125 }
126 
SetGofInfo(const GofInfoVP9 & gof_info,size_t idx)127 void VCMSessionInfo::SetGofInfo(const GofInfoVP9& gof_info, size_t idx) {
128   if (packets_.empty() || packets_.front().video_header.codec != kRtpVideoVp9 ||
129       packets_.front().video_header.codecHeader.VP9.flexible_mode) {
130     return;
131   }
132   packets_.front().video_header.codecHeader.VP9.temporal_idx =
133       gof_info.temporal_idx[idx];
134   packets_.front().video_header.codecHeader.VP9.temporal_up_switch =
135       gof_info.temporal_up_switch[idx];
136   packets_.front().video_header.codecHeader.VP9.num_ref_pics =
137       gof_info.num_ref_pics[idx];
138   for (uint8_t i = 0; i < gof_info.num_ref_pics[idx]; ++i) {
139     packets_.front().video_header.codecHeader.VP9.pid_diff[i] =
140         gof_info.pid_diff[idx][i];
141   }
142 }
143 
Reset()144 void VCMSessionInfo::Reset() {
145   session_nack_ = false;
146   complete_ = false;
147   decodable_ = false;
148   frame_type_ = kVideoFrameDelta;
149   packets_.clear();
150   empty_seq_num_low_ = -1;
151   empty_seq_num_high_ = -1;
152   first_packet_seq_num_ = -1;
153   last_packet_seq_num_ = -1;
154 }
155 
SessionLength() const156 size_t VCMSessionInfo::SessionLength() const {
157   size_t length = 0;
158   for (PacketIteratorConst it = packets_.begin(); it != packets_.end(); ++it)
159     length += (*it).sizeBytes;
160   return length;
161 }
162 
NumPackets() const163 int VCMSessionInfo::NumPackets() const {
164   return packets_.size();
165 }
166 
InsertBuffer(uint8_t * frame_buffer,PacketIterator packet_it)167 size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer,
168                                     PacketIterator packet_it) {
169   VCMPacket& packet = *packet_it;
170   PacketIterator it;
171 
172   // Calculate the offset into the frame buffer for this packet.
173   size_t offset = 0;
174   for (it = packets_.begin(); it != packet_it; ++it)
175     offset += (*it).sizeBytes;
176 
177   // Set the data pointer to pointing to the start of this packet in the
178   // frame buffer.
179   const uint8_t* packet_buffer = packet.dataPtr;
180   packet.dataPtr = frame_buffer + offset;
181 
182   // We handle H.264 STAP-A packets in a special way as we need to remove the
183   // two length bytes between each NAL unit, and potentially add start codes.
184   // TODO(pbos): Remove H264 parsing from this step and use a fragmentation
185   // header supplied by the H264 depacketizer.
186   const size_t kH264NALHeaderLengthInBytes = 1;
187   const size_t kLengthFieldLength = 2;
188   if (packet.video_header.codec == kRtpVideoH264 &&
189       packet.video_header.codecHeader.H264.packetization_type == kH264StapA) {
190     size_t required_length = 0;
191     const uint8_t* nalu_ptr = packet_buffer + kH264NALHeaderLengthInBytes;
192     // Must check that incoming data length doesn't extend past end of buffer.
193     // We allow for 100 bytes of expansion due to startcodes being longer than
194     // length fields.
195     while (nalu_ptr + kLengthFieldLength <= packet_buffer + packet.sizeBytes) {
196       size_t length = BufferToUWord16(nalu_ptr);
197       if (nalu_ptr + kLengthFieldLength + length <= packet_buffer + packet.sizeBytes) {
198         required_length +=
199           length + (packet.insertStartCode ? kH264StartCodeLengthBytes : 0);
200         nalu_ptr += kLengthFieldLength + length;
201       } else {
202         // Something is very wrong!
203         RTC_LOG(LS_ERROR) << "Failed to insert packet due to corrupt H264 STAP-A";
204         return 0;
205       }
206     }
207     ShiftSubsequentPackets(packet_it, required_length);
208     nalu_ptr = packet_buffer + kH264NALHeaderLengthInBytes;
209     uint8_t* frame_buffer_ptr = frame_buffer + offset;
210     // we already know we won't go past end-of-buffer
211     while (nalu_ptr + kLengthFieldLength <= packet_buffer + packet.sizeBytes) {
212       size_t length = BufferToUWord16(nalu_ptr);
213       nalu_ptr += kLengthFieldLength;
214       frame_buffer_ptr += Insert(nalu_ptr, length, packet.insertStartCode,
215                                  const_cast<uint8_t*>(frame_buffer_ptr));
216       nalu_ptr += length;
217     }
218     packet.sizeBytes = required_length;
219     return packet.sizeBytes;
220   }
221   ShiftSubsequentPackets(
222       packet_it, packet.sizeBytes +
223                      (packet.insertStartCode ? kH264StartCodeLengthBytes : 0));
224 
225   packet.sizeBytes =
226       Insert(packet_buffer, packet.sizeBytes, packet.insertStartCode,
227              const_cast<uint8_t*>(packet.dataPtr));
228   return packet.sizeBytes;
229 }
230 
Insert(const uint8_t * buffer,size_t length,bool insert_start_code,uint8_t * frame_buffer)231 size_t VCMSessionInfo::Insert(const uint8_t* buffer,
232                               size_t length,
233                               bool insert_start_code,
234                               uint8_t* frame_buffer) {
235   if (insert_start_code) {
236     const unsigned char startCode[] = {0, 0, 0, 1};
237     memcpy(frame_buffer, startCode, kH264StartCodeLengthBytes);
238   }
239   memcpy(frame_buffer + (insert_start_code ? kH264StartCodeLengthBytes : 0),
240          buffer, length);
241   length += (insert_start_code ? kH264StartCodeLengthBytes : 0);
242 
243   return length;
244 }
245 
ShiftSubsequentPackets(PacketIterator it,int steps_to_shift)246 void VCMSessionInfo::ShiftSubsequentPackets(PacketIterator it,
247                                             int steps_to_shift) {
248   ++it;
249   if (it == packets_.end())
250     return;
251   uint8_t* first_packet_ptr = const_cast<uint8_t*>((*it).dataPtr);
252   int shift_length = 0;
253   // Calculate the total move length and move the data pointers in advance.
254   for (; it != packets_.end(); ++it) {
255     shift_length += (*it).sizeBytes;
256     if ((*it).dataPtr != NULL)
257       (*it).dataPtr += steps_to_shift;
258   }
259   memmove(first_packet_ptr + steps_to_shift, first_packet_ptr, shift_length);
260 }
261 
UpdateCompleteSession()262 void VCMSessionInfo::UpdateCompleteSession() {
263   if (HaveFirstPacket() && HaveLastPacket()) {
264     // Do we have all the packets in this session?
265     bool complete_session = true;
266     PacketIterator it = packets_.begin();
267     PacketIterator prev_it = it;
268     ++it;
269     for (; it != packets_.end(); ++it) {
270       if (!InSequence(it, prev_it)) {
271         complete_session = false;
272         break;
273       }
274       prev_it = it;
275     }
276     complete_ = complete_session;
277   }
278 }
279 
UpdateDecodableSession(const FrameData & frame_data)280 void VCMSessionInfo::UpdateDecodableSession(const FrameData& frame_data) {
281   // Irrelevant if session is already complete or decodable
282   if (complete_ || decodable_)
283     return;
284   // TODO(agalusza): Account for bursty loss.
285   // TODO(agalusza): Refine these values to better approximate optimal ones.
286   // Do not decode frames if the RTT is lower than this.
287   const int64_t kRttThreshold = 100;
288   // Do not decode frames if the number of packets is between these two
289   // thresholds.
290   const float kLowPacketPercentageThreshold = 0.2f;
291   const float kHighPacketPercentageThreshold = 0.8f;
292   if (frame_data.rtt_ms < kRttThreshold || frame_type_ == kVideoFrameKey ||
293       !HaveFirstPacket() ||
294       (NumPackets() <= kHighPacketPercentageThreshold *
295                            frame_data.rolling_average_packets_per_frame &&
296        NumPackets() > kLowPacketPercentageThreshold *
297                           frame_data.rolling_average_packets_per_frame))
298     return;
299 
300   decodable_ = true;
301 }
302 
complete() const303 bool VCMSessionInfo::complete() const {
304   return complete_;
305 }
306 
decodable() const307 bool VCMSessionInfo::decodable() const {
308   return decodable_;
309 }
310 
311 // Find the end of the NAL unit which the packet pointed to by |packet_it|
312 // belongs to. Returns an iterator to the last packet of the frame if the end
313 // of the NAL unit wasn't found.
FindNaluEnd(PacketIterator packet_it) const314 VCMSessionInfo::PacketIterator VCMSessionInfo::FindNaluEnd(
315     PacketIterator packet_it) const {
316   if ((*packet_it).completeNALU == kNaluEnd ||
317       (*packet_it).completeNALU == kNaluComplete) {
318     return packet_it;
319   }
320   // Find the end of the NAL unit.
321   for (; packet_it != packets_.end(); ++packet_it) {
322     if (((*packet_it).completeNALU == kNaluComplete &&
323          (*packet_it).sizeBytes > 0) ||
324         // Found next NALU.
325         (*packet_it).completeNALU == kNaluStart)
326       return --packet_it;
327     if ((*packet_it).completeNALU == kNaluEnd)
328       return packet_it;
329   }
330   // The end wasn't found.
331   return --packet_it;
332 }
333 
DeletePacketData(PacketIterator start,PacketIterator end)334 size_t VCMSessionInfo::DeletePacketData(PacketIterator start,
335                                         PacketIterator end) {
336   size_t bytes_to_delete = 0;  // The number of bytes to delete.
337   PacketIterator packet_after_end = end;
338   ++packet_after_end;
339 
340   // Get the number of bytes to delete.
341   // Clear the size of these packets.
342   for (PacketIterator it = start; it != packet_after_end; ++it) {
343     bytes_to_delete += (*it).sizeBytes;
344     (*it).sizeBytes = 0;
345     (*it).dataPtr = NULL;
346   }
347   if (bytes_to_delete > 0)
348     ShiftSubsequentPackets(end, -static_cast<int>(bytes_to_delete));
349   return bytes_to_delete;
350 }
351 
FindNextPartitionBeginning(PacketIterator it) const352 VCMSessionInfo::PacketIterator VCMSessionInfo::FindNextPartitionBeginning(
353     PacketIterator it) const {
354   while (it != packets_.end()) {
355     if ((*it).video_header.codecHeader.VP8.beginningOfPartition) {
356       return it;
357     }
358     ++it;
359   }
360   return it;
361 }
362 
FindPartitionEnd(PacketIterator it) const363 VCMSessionInfo::PacketIterator VCMSessionInfo::FindPartitionEnd(
364     PacketIterator it) const {
365   assert((*it).codec == kVideoCodecVP8);
366   PacketIterator prev_it = it;
367   const int partition_id = (*it).video_header.codecHeader.VP8.partitionId;
368   while (it != packets_.end()) {
369     bool beginning = (*it).video_header.codecHeader.VP8.beginningOfPartition;
370     int current_partition_id = (*it).video_header.codecHeader.VP8.partitionId;
371     bool packet_loss_found = (!beginning && !InSequence(it, prev_it));
372     if (packet_loss_found ||
373         (beginning && current_partition_id != partition_id)) {
374       // Missing packet, the previous packet was the last in sequence.
375       return prev_it;
376     }
377     prev_it = it;
378     ++it;
379   }
380   return prev_it;
381 }
382 
InSequence(const PacketIterator & packet_it,const PacketIterator & prev_packet_it)383 bool VCMSessionInfo::InSequence(const PacketIterator& packet_it,
384                                 const PacketIterator& prev_packet_it) {
385   // If the two iterators are pointing to the same packet they are considered
386   // to be in sequence.
387   return (packet_it == prev_packet_it ||
388           (static_cast<uint16_t>((*prev_packet_it).seqNum + 1) ==
389            (*packet_it).seqNum));
390 }
391 
MakeDecodable()392 size_t VCMSessionInfo::MakeDecodable() {
393   size_t return_length = 0;
394   if (packets_.empty()) {
395     return 0;
396   }
397   PacketIterator it = packets_.begin();
398   // Make sure we remove the first NAL unit if it's not decodable.
399   if ((*it).completeNALU == kNaluIncomplete || (*it).completeNALU == kNaluEnd) {
400     PacketIterator nalu_end = FindNaluEnd(it);
401     return_length += DeletePacketData(it, nalu_end);
402     it = nalu_end;
403   }
404   PacketIterator prev_it = it;
405   // Take care of the rest of the NAL units.
406   for (; it != packets_.end(); ++it) {
407     bool start_of_nalu = ((*it).completeNALU == kNaluStart ||
408                           (*it).completeNALU == kNaluComplete);
409     if (!start_of_nalu && !InSequence(it, prev_it)) {
410       // Found a sequence number gap due to packet loss.
411       PacketIterator nalu_end = FindNaluEnd(it);
412       return_length += DeletePacketData(it, nalu_end);
413       it = nalu_end;
414     }
415     prev_it = it;
416   }
417   return return_length;
418 }
419 
SetNotDecodableIfIncomplete()420 void VCMSessionInfo::SetNotDecodableIfIncomplete() {
421   // We don't need to check for completeness first because the two are
422   // orthogonal. If complete_ is true, decodable_ is irrelevant.
423   decodable_ = false;
424 }
425 
HaveFirstPacket() const426 bool VCMSessionInfo::HaveFirstPacket() const {
427   return !packets_.empty() && (first_packet_seq_num_ != -1);
428 }
429 
HaveLastPacket() const430 bool VCMSessionInfo::HaveLastPacket() const {
431   return !packets_.empty() && (last_packet_seq_num_ != -1);
432 }
433 
session_nack() const434 bool VCMSessionInfo::session_nack() const {
435   return session_nack_;
436 }
437 
InsertPacket(const VCMPacket & packet,uint8_t * frame_buffer,VCMDecodeErrorMode decode_error_mode,const FrameData & frame_data)438 int VCMSessionInfo::InsertPacket(const VCMPacket& packet,
439                                  uint8_t* frame_buffer,
440                                  VCMDecodeErrorMode decode_error_mode,
441                                  const FrameData& frame_data) {
442   if (packet.frameType == kEmptyFrame) {
443     // Update sequence number of an empty packet.
444     // Only media packets are inserted into the packet list.
445     InformOfEmptyPacket(packet.seqNum);
446     return 0;
447   }
448 
449   if (packets_.size() == kMaxPacketsInSession) {
450     RTC_LOG(LS_ERROR) << "Max number of packets per frame has been reached.";
451     return -1;
452   }
453 
454   // Find the position of this packet in the packet list in sequence number
455   // order and insert it. Loop over the list in reverse order.
456   ReversePacketIterator rit = packets_.rbegin();
457   for (; rit != packets_.rend(); ++rit)
458     if (LatestSequenceNumber(packet.seqNum, (*rit).seqNum) == packet.seqNum)
459       break;
460 
461   // Check for duplicate packets.
462   if (rit != packets_.rend() && (*rit).seqNum == packet.seqNum &&
463       (*rit).sizeBytes > 0)
464     return -2;
465 
466   if (packet.codec == kVideoCodecH264) {
467     // H.264 can have leading or trailing non-VCL (Video Coding Layer)
468     // NALUs, such as SPS/PPS/SEI and others.  Also, the RTP marker bit is
469     // not reliable for the last packet of a frame (RFC 6184 5.1 - "Decoders
470     // [] MUST NOT rely on this property"), so allow out-of-order packets to
471     // update the first and last seq# range.  Also mark as a key frame if
472     // any packet is of that type.
473     if (frame_type_ != kVideoFrameKey) {
474       frame_type_ = packet.frameType;
475     }
476     if (packet.is_first_packet_in_frame &&
477         (first_packet_seq_num_ == -1 ||
478          IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) {
479       first_packet_seq_num_ = packet.seqNum;
480     }
481     // Note: the code does *not* currently handle the Marker bit being totally
482     // absent from a frame.  It does not, however, depend on it being on the last
483     // packet of the 'frame'/'session'.
484     if (packet.markerBit &&
485         (last_packet_seq_num_ == -1 ||
486          IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
487       last_packet_seq_num_ = packet.seqNum;
488     }
489   } else {
490     // Only insert media packets between first and last packets (when
491     // available).
492     // Placing check here, as to properly account for duplicate packets.
493     // Check if this is first packet (only valid for some codecs)
494     // Should only be set for one packet per session.
495     if (packet.is_first_packet_in_frame && first_packet_seq_num_ == -1) {
496       // The first packet in a frame signals the frame type.
497       frame_type_ = packet.frameType;
498       // Store the sequence number for the first packet.
499       first_packet_seq_num_ = static_cast<int>(packet.seqNum);
500     } else if (first_packet_seq_num_ != -1 &&
501                IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum)) {
502       RTC_LOG(LS_WARNING)
503           << "Received packet with a sequence number which is out "
504              "of frame boundaries";
505       return -3;
506     } else if (frame_type_ == kEmptyFrame && packet.frameType != kEmptyFrame) {
507       // Update the frame type with the type of the first media packet.
508       // TODO(mikhal): Can this trigger?
509       frame_type_ = packet.frameType;
510     }
511 
512     // Track the marker bit, should only be set for one packet per session.
513     if (packet.markerBit && last_packet_seq_num_ == -1) {
514       last_packet_seq_num_ = static_cast<int>(packet.seqNum);
515     } else if (last_packet_seq_num_ != -1 &&
516                IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_)) {
517       RTC_LOG(LS_WARNING)
518           << "Received packet with a sequence number which is out "
519              "of frame boundaries";
520       return -3;
521     }
522   }
523 
524   // The insert operation invalidates the iterator |rit|.
525   PacketIterator packet_list_it = packets_.insert(rit.base(), packet);
526 
527   size_t returnLength = InsertBuffer(frame_buffer, packet_list_it);
528   UpdateCompleteSession();
529   // We call MakeDecodable() before decoding, which removes packets after a loss
530   // (and which means h.264 mode 1 frames with a loss in the first packet will be
531   // totally removed)
532   if (decode_error_mode == kWithErrors)
533     decodable_ = true;
534   else if (decode_error_mode == kSelectiveErrors)
535     UpdateDecodableSession(frame_data);
536   return static_cast<int>(returnLength);
537 }
538 
InformOfEmptyPacket(uint16_t seq_num)539 void VCMSessionInfo::InformOfEmptyPacket(uint16_t seq_num) {
540   // Empty packets may be FEC or filler packets. They are sequential and
541   // follow the data packets, therefore, we should only keep track of the high
542   // and low sequence numbers and may assume that the packets in between are
543   // empty packets belonging to the same frame (timestamp).
544   if (empty_seq_num_high_ == -1)
545     empty_seq_num_high_ = seq_num;
546   else
547     empty_seq_num_high_ = LatestSequenceNumber(seq_num, empty_seq_num_high_);
548   if (empty_seq_num_low_ == -1 ||
549       IsNewerSequenceNumber(empty_seq_num_low_, seq_num))
550     empty_seq_num_low_ = seq_num;
551 }
552 
553 }  // namespace webrtc
554