1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/rtp_frame_reference_finder.h"
12 
13 #include <algorithm>
14 #include <limits>
15 
16 #include "absl/base/macros.h"
17 #include "absl/types/variant.h"
18 #include "modules/video_coding/frame_object.h"
19 #include "modules/video_coding/packet_buffer.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/logging.h"
22 
23 namespace webrtc {
24 namespace video_coding {
25 
RtpFrameReferenceFinder(OnCompleteFrameCallback * frame_callback)26 RtpFrameReferenceFinder::RtpFrameReferenceFinder(
27     OnCompleteFrameCallback* frame_callback)
28     : RtpFrameReferenceFinder(frame_callback, 0) {}
29 
RtpFrameReferenceFinder(OnCompleteFrameCallback * frame_callback,int64_t picture_id_offset)30 RtpFrameReferenceFinder::RtpFrameReferenceFinder(
31     OnCompleteFrameCallback* frame_callback,
32     int64_t picture_id_offset)
33     : last_picture_id_(-1),
34       current_ss_idx_(0),
35       cleared_to_seq_num_(-1),
36       frame_callback_(frame_callback),
37       picture_id_offset_(picture_id_offset) {}
38 
39 RtpFrameReferenceFinder::~RtpFrameReferenceFinder() = default;
40 
ManageFrame(std::unique_ptr<RtpFrameObject> frame)41 void RtpFrameReferenceFinder::ManageFrame(
42     std::unique_ptr<RtpFrameObject> frame) {
43   // If we have cleared past this frame, drop it.
44   if (cleared_to_seq_num_ != -1 &&
45       AheadOf<uint16_t>(cleared_to_seq_num_, frame->first_seq_num())) {
46     return;
47   }
48 
49   FrameDecision decision = ManageFrameInternal(frame.get());
50 
51   switch (decision) {
52     case kStash:
53       if (stashed_frames_.size() > kMaxStashedFrames)
54         stashed_frames_.pop_back();
55       stashed_frames_.push_front(std::move(frame));
56       break;
57     case kHandOff:
58       HandOffFrame(std::move(frame));
59       RetryStashedFrames();
60       break;
61     case kDrop:
62       break;
63   }
64 }
65 
RetryStashedFrames()66 void RtpFrameReferenceFinder::RetryStashedFrames() {
67   bool complete_frame = false;
68   do {
69     complete_frame = false;
70     for (auto frame_it = stashed_frames_.begin();
71          frame_it != stashed_frames_.end();) {
72       FrameDecision decision = ManageFrameInternal(frame_it->get());
73 
74       switch (decision) {
75         case kStash:
76           ++frame_it;
77           break;
78         case kHandOff:
79           complete_frame = true;
80           HandOffFrame(std::move(*frame_it));
81           ABSL_FALLTHROUGH_INTENDED;
82         case kDrop:
83           frame_it = stashed_frames_.erase(frame_it);
84       }
85     }
86   } while (complete_frame);
87 }
88 
HandOffFrame(std::unique_ptr<RtpFrameObject> frame)89 void RtpFrameReferenceFinder::HandOffFrame(
90     std::unique_ptr<RtpFrameObject> frame) {
91   frame->id.picture_id += picture_id_offset_;
92   for (size_t i = 0; i < frame->num_references; ++i) {
93     frame->references[i] += picture_id_offset_;
94   }
95 
96   frame_callback_->OnCompleteFrame(std::move(frame));
97 }
98 
99 RtpFrameReferenceFinder::FrameDecision
ManageFrameInternal(RtpFrameObject * frame)100 RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
101   if (const absl::optional<RTPVideoHeader::GenericDescriptorInfo>&
102           generic_descriptor = frame->GetRtpVideoHeader().generic) {
103     return ManageFrameGeneric(frame, *generic_descriptor);
104   }
105 
106   switch (frame->codec_type()) {
107     case kVideoCodecVP8:
108       return ManageFrameVp8(frame);
109     case kVideoCodecVP9:
110       return ManageFrameVp9(frame);
111     case kVideoCodecGeneric:
112       if (auto* generic_header = absl::get_if<RTPVideoHeaderLegacyGeneric>(
113               &frame->GetRtpVideoHeader().video_type_header)) {
114         return ManageFramePidOrSeqNum(frame, generic_header->picture_id);
115       }
116       ABSL_FALLTHROUGH_INTENDED;
117     default:
118       return ManageFramePidOrSeqNum(frame, kNoPictureId);
119   }
120 }
121 
PaddingReceived(uint16_t seq_num)122 void RtpFrameReferenceFinder::PaddingReceived(uint16_t seq_num) {
123   auto clean_padding_to =
124       stashed_padding_.lower_bound(seq_num - kMaxPaddingAge);
125   stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to);
126   stashed_padding_.insert(seq_num);
127   UpdateLastPictureIdWithPadding(seq_num);
128   RetryStashedFrames();
129 }
130 
ClearTo(uint16_t seq_num)131 void RtpFrameReferenceFinder::ClearTo(uint16_t seq_num) {
132   cleared_to_seq_num_ = seq_num;
133 
134   auto it = stashed_frames_.begin();
135   while (it != stashed_frames_.end()) {
136     if (AheadOf<uint16_t>(cleared_to_seq_num_, (*it)->first_seq_num())) {
137       it = stashed_frames_.erase(it);
138     } else {
139       ++it;
140     }
141   }
142 }
143 
UpdateLastPictureIdWithPadding(uint16_t seq_num)144 void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
145   auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num);
146 
147   // If this padding packet "belongs" to a group of pictures that we don't track
148   // anymore, do nothing.
149   if (gop_seq_num_it == last_seq_num_gop_.begin())
150     return;
151   --gop_seq_num_it;
152 
153   // Calculate the next contiuous sequence number and search for it in
154   // the padding packets we have stashed.
155   uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1;
156   auto padding_seq_num_it =
157       stashed_padding_.lower_bound(next_seq_num_with_padding);
158 
159   // While there still are padding packets and those padding packets are
160   // continuous, then advance the "last-picture-id-with-padding" and remove
161   // the stashed padding packet.
162   while (padding_seq_num_it != stashed_padding_.end() &&
163          *padding_seq_num_it == next_seq_num_with_padding) {
164     gop_seq_num_it->second.second = next_seq_num_with_padding;
165     ++next_seq_num_with_padding;
166     padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
167   }
168 
169   // In the case where the stream has been continuous without any new keyframes
170   // for a while there is a risk that new frames will appear to be older than
171   // the keyframe they belong to due to wrapping sequence number. In order
172   // to prevent this we advance the picture id of the keyframe every so often.
173   if (ForwardDiff(gop_seq_num_it->first, seq_num) > 10000) {
174     auto save = gop_seq_num_it->second;
175     last_seq_num_gop_.clear();
176     last_seq_num_gop_[seq_num] = save;
177   }
178 }
179 
180 RtpFrameReferenceFinder::FrameDecision
ManageFrameGeneric(RtpFrameObject * frame,const RTPVideoHeader::GenericDescriptorInfo & descriptor)181 RtpFrameReferenceFinder::ManageFrameGeneric(
182     RtpFrameObject* frame,
183     const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
184   frame->id.picture_id = descriptor.frame_id;
185   frame->id.spatial_layer = descriptor.spatial_index;
186 
187   if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
188     RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
189     return kDrop;
190   }
191 
192   frame->num_references = descriptor.dependencies.size();
193   for (size_t i = 0; i < descriptor.dependencies.size(); ++i)
194     frame->references[i] = descriptor.dependencies[i];
195 
196   return kHandOff;
197 }
198 
199 RtpFrameReferenceFinder::FrameDecision
ManageFramePidOrSeqNum(RtpFrameObject * frame,int picture_id)200 RtpFrameReferenceFinder::ManageFramePidOrSeqNum(RtpFrameObject* frame,
201                                                 int picture_id) {
202   // If |picture_id| is specified then we use that to set the frame references,
203   // otherwise we use sequence number.
204   if (picture_id != kNoPictureId) {
205     frame->id.picture_id = unwrapper_.Unwrap(picture_id & 0x7FFF);
206     frame->num_references =
207         frame->frame_type() == VideoFrameType::kVideoFrameKey ? 0 : 1;
208     frame->references[0] = frame->id.picture_id - 1;
209     return kHandOff;
210   }
211 
212   if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
213     last_seq_num_gop_.insert(std::make_pair(
214         frame->last_seq_num(),
215         std::make_pair(frame->last_seq_num(), frame->last_seq_num())));
216   }
217 
218   // We have received a frame but not yet a keyframe, stash this frame.
219   if (last_seq_num_gop_.empty())
220     return kStash;
221 
222   // Clean up info for old keyframes but make sure to keep info
223   // for the last keyframe.
224   auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
225   for (auto it = last_seq_num_gop_.begin();
226        it != clean_to && last_seq_num_gop_.size() > 1;) {
227     it = last_seq_num_gop_.erase(it);
228   }
229 
230   // Find the last sequence number of the last frame for the keyframe
231   // that this frame indirectly references.
232   auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
233   if (seq_num_it == last_seq_num_gop_.begin()) {
234     RTC_LOG(LS_WARNING) << "Generic frame with packet range ["
235                         << frame->first_seq_num() << ", "
236                         << frame->last_seq_num()
237                         << "] has no GoP, dropping frame.";
238     return kDrop;
239   }
240   seq_num_it--;
241 
242   // Make sure the packet sequence numbers are continuous, otherwise stash
243   // this frame.
244   uint16_t last_picture_id_gop = seq_num_it->second.first;
245   uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second;
246   if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) {
247     uint16_t prev_seq_num = frame->first_seq_num() - 1;
248 
249     if (prev_seq_num != last_picture_id_with_padding_gop)
250       return kStash;
251   }
252 
253   RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
254 
255   // Since keyframes can cause reordering we can't simply assign the
256   // picture id according to some incrementing counter.
257   frame->id.picture_id = frame->last_seq_num();
258   frame->num_references =
259       frame->frame_type() == VideoFrameType::kVideoFrameDelta;
260   frame->references[0] = rtp_seq_num_unwrapper_.Unwrap(last_picture_id_gop);
261   if (AheadOf<uint16_t>(frame->id.picture_id, last_picture_id_gop)) {
262     seq_num_it->second.first = frame->id.picture_id;
263     seq_num_it->second.second = frame->id.picture_id;
264   }
265 
266   UpdateLastPictureIdWithPadding(frame->id.picture_id);
267   frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
268   return kHandOff;
269 }
270 
ManageFrameVp8(RtpFrameObject * frame)271 RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp8(
272     RtpFrameObject* frame) {
273   const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
274   const RTPVideoHeaderVP8& codec_header =
275       absl::get<RTPVideoHeaderVP8>(video_header.video_type_header);
276 
277   if (codec_header.pictureId == kNoPictureId ||
278       codec_header.temporalIdx == kNoTemporalIdx ||
279       codec_header.tl0PicIdx == kNoTl0PicIdx) {
280     return ManageFramePidOrSeqNum(frame, codec_header.pictureId);
281   }
282 
283   // Protect against corrupted packets with arbitrary large temporal idx.
284   if (codec_header.temporalIdx >= kMaxTemporalLayers)
285     return kDrop;
286 
287   frame->id.picture_id = codec_header.pictureId & 0x7FFF;
288 
289   if (last_picture_id_ == -1)
290     last_picture_id_ = frame->id.picture_id;
291 
292   // Clean up info about not yet received frames that are too old.
293   uint16_t old_picture_id =
294       Subtract<kPicIdLength>(frame->id.picture_id, kMaxNotYetReceivedFrames);
295   auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id);
296   not_yet_received_frames_.erase(not_yet_received_frames_.begin(),
297                                  clean_frames_to);
298   // Avoid re-adding picture ids that were just erased.
299   if (AheadOf<uint16_t, kPicIdLength>(old_picture_id, last_picture_id_)) {
300     last_picture_id_ = old_picture_id;
301   }
302   // Find if there has been a gap in fully received frames and save the picture
303   // id of those frames in |not_yet_received_frames_|.
304   if (AheadOf<uint16_t, kPicIdLength>(frame->id.picture_id, last_picture_id_)) {
305     do {
306       last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
307       not_yet_received_frames_.insert(last_picture_id_);
308     } while (last_picture_id_ != frame->id.picture_id);
309   }
310 
311   int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(codec_header.tl0PicIdx & 0xFF);
312 
313   // Clean up info for base layers that are too old.
314   int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo;
315   auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
316   layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
317 
318   if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
319     if (codec_header.temporalIdx != 0) {
320       return kDrop;
321     }
322     frame->num_references = 0;
323     layer_info_[unwrapped_tl0].fill(-1);
324     UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
325     return kHandOff;
326   }
327 
328   auto layer_info_it = layer_info_.find(
329       codec_header.temporalIdx == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0);
330 
331   // If we don't have the base layer frame yet, stash this frame.
332   if (layer_info_it == layer_info_.end())
333     return kStash;
334 
335   // A non keyframe base layer frame has been received, copy the layer info
336   // from the previous base layer frame and set a reference to the previous
337   // base layer frame.
338   if (codec_header.temporalIdx == 0) {
339     layer_info_it =
340         layer_info_.emplace(unwrapped_tl0, layer_info_it->second).first;
341     frame->num_references = 1;
342     int64_t last_pid_on_layer = layer_info_it->second[0];
343 
344     // Is this an old frame that has already been used to update the state? If
345     // so, drop it.
346     if (AheadOrAt<uint16_t, kPicIdLength>(last_pid_on_layer,
347                                           frame->id.picture_id)) {
348       return kDrop;
349     }
350 
351     frame->references[0] = last_pid_on_layer;
352     UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
353     return kHandOff;
354   }
355 
356   // Layer sync frame, this frame only references its base layer frame.
357   if (codec_header.layerSync) {
358     frame->num_references = 1;
359     int64_t last_pid_on_layer = layer_info_it->second[codec_header.temporalIdx];
360 
361     // Is this an old frame that has already been used to update the state? If
362     // so, drop it.
363     if (last_pid_on_layer != -1 &&
364         AheadOrAt<uint16_t, kPicIdLength>(last_pid_on_layer,
365                                           frame->id.picture_id)) {
366       return kDrop;
367     }
368 
369     frame->references[0] = layer_info_it->second[0];
370     UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
371     return kHandOff;
372   }
373 
374   // Find all references for this frame.
375   frame->num_references = 0;
376   for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) {
377     // If we have not yet received a previous frame on this temporal layer,
378     // stash this frame.
379     if (layer_info_it->second[layer] == -1)
380       return kStash;
381 
382     // If the last frame on this layer is ahead of this frame it means that
383     // a layer sync frame has been received after this frame for the same
384     // base layer frame, drop this frame.
385     if (AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[layer],
386                                         frame->id.picture_id)) {
387       return kDrop;
388     }
389 
390     // If we have not yet received a frame between this frame and the referenced
391     // frame then we have to wait for that frame to be completed first.
392     auto not_received_frame_it =
393         not_yet_received_frames_.upper_bound(layer_info_it->second[layer]);
394     if (not_received_frame_it != not_yet_received_frames_.end() &&
395         AheadOf<uint16_t, kPicIdLength>(frame->id.picture_id,
396                                         *not_received_frame_it)) {
397       return kStash;
398     }
399 
400     if (!(AheadOf<uint16_t, kPicIdLength>(frame->id.picture_id,
401                                           layer_info_it->second[layer]))) {
402       RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id
403                           << " and packet range [" << frame->first_seq_num()
404                           << ", " << frame->last_seq_num()
405                           << "] already received, "
406                              " dropping frame.";
407       return kDrop;
408     }
409 
410     ++frame->num_references;
411     frame->references[layer] = layer_info_it->second[layer];
412   }
413 
414   UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx);
415   return kHandOff;
416 }
417 
UpdateLayerInfoVp8(RtpFrameObject * frame,int64_t unwrapped_tl0,uint8_t temporal_idx)418 void RtpFrameReferenceFinder::UpdateLayerInfoVp8(RtpFrameObject* frame,
419                                                  int64_t unwrapped_tl0,
420                                                  uint8_t temporal_idx) {
421   auto layer_info_it = layer_info_.find(unwrapped_tl0);
422 
423   // Update this layer info and newer.
424   while (layer_info_it != layer_info_.end()) {
425     if (layer_info_it->second[temporal_idx] != -1 &&
426         AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[temporal_idx],
427                                         frame->id.picture_id)) {
428       // The frame was not newer, then no subsequent layer info have to be
429       // update.
430       break;
431     }
432 
433     layer_info_it->second[temporal_idx] = frame->id.picture_id;
434     ++unwrapped_tl0;
435     layer_info_it = layer_info_.find(unwrapped_tl0);
436   }
437   not_yet_received_frames_.erase(frame->id.picture_id);
438 
439   UnwrapPictureIds(frame);
440 }
441 
ManageFrameVp9(RtpFrameObject * frame)442 RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp9(
443     RtpFrameObject* frame) {
444   const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
445   const RTPVideoHeaderVP9& codec_header =
446       absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
447 
448   if (codec_header.picture_id == kNoPictureId ||
449       codec_header.temporal_idx == kNoTemporalIdx) {
450     return ManageFramePidOrSeqNum(frame, codec_header.picture_id);
451   }
452 
453   // Protect against corrupted packets with arbitrary large temporal idx.
454   if (codec_header.temporal_idx >= kMaxTemporalLayers ||
455       codec_header.spatial_idx >= kMaxSpatialLayers)
456     return kDrop;
457 
458   frame->id.spatial_layer = codec_header.spatial_idx;
459   frame->inter_layer_predicted = codec_header.inter_layer_predicted;
460   frame->id.picture_id = codec_header.picture_id & 0x7FFF;
461 
462   if (last_picture_id_ == -1)
463     last_picture_id_ = frame->id.picture_id;
464 
465   if (codec_header.flexible_mode) {
466     if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
467       return kDrop;
468     }
469     frame->num_references = codec_header.num_ref_pics;
470     for (size_t i = 0; i < frame->num_references; ++i) {
471       frame->references[i] = Subtract<kPicIdLength>(frame->id.picture_id,
472                                                     codec_header.pid_diff[i]);
473     }
474 
475     UnwrapPictureIds(frame);
476     return kHandOff;
477   }
478 
479   if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
480     RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
481                            "non-flexible mode.";
482     return kDrop;
483   }
484 
485   GofInfo* info;
486   int64_t unwrapped_tl0 =
487       tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
488   if (codec_header.ss_data_available) {
489     if (codec_header.temporal_idx != 0) {
490       RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
491                              "layer frame. Scalability structure ignored.";
492     } else {
493       if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
494         return kDrop;
495       }
496 
497       for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
498         if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
499           return kDrop;
500         }
501       }
502 
503       GofInfoVP9 gof = codec_header.gof;
504       if (gof.num_frames_in_gof == 0) {
505         RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
506                                "that stream has only one temporal layer.";
507         gof.SetGofInfoVP9(kTemporalStructureMode1);
508       }
509 
510       current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
511       scalability_structures_[current_ss_idx_] = gof;
512       scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id;
513       gof_info_.emplace(unwrapped_tl0,
514                         GofInfo(&scalability_structures_[current_ss_idx_],
515                                 frame->id.picture_id));
516     }
517 
518     const auto gof_info_it = gof_info_.find(unwrapped_tl0);
519     if (gof_info_it == gof_info_.end())
520       return kStash;
521 
522     info = &gof_info_it->second;
523 
524     if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
525       frame->num_references = 0;
526       FrameReceivedVp9(frame->id.picture_id, info);
527       UnwrapPictureIds(frame);
528       return kHandOff;
529     }
530   } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
531     if (frame->id.spatial_layer == 0) {
532       RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
533       return kDrop;
534     }
535     const auto gof_info_it = gof_info_.find(unwrapped_tl0);
536     if (gof_info_it == gof_info_.end())
537       return kStash;
538 
539     info = &gof_info_it->second;
540 
541     if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
542       frame->num_references = 0;
543       FrameReceivedVp9(frame->id.picture_id, info);
544       UnwrapPictureIds(frame);
545       return kHandOff;
546     }
547   } else {
548     auto gof_info_it = gof_info_.find(
549         (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
550 
551     // Gof info for this frame is not available yet, stash this frame.
552     if (gof_info_it == gof_info_.end())
553       return kStash;
554 
555     if (codec_header.temporal_idx == 0) {
556       gof_info_it = gof_info_
557                         .emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof,
558                                                         frame->id.picture_id))
559                         .first;
560     }
561 
562     info = &gof_info_it->second;
563   }
564 
565   // Clean up info for base layers that are too old.
566   int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
567   auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
568   gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
569 
570   FrameReceivedVp9(frame->id.picture_id, info);
571 
572   // Make sure we don't miss any frame that could potentially have the
573   // up switch flag set.
574   if (MissingRequiredFrameVp9(frame->id.picture_id, *info))
575     return kStash;
576 
577   if (codec_header.temporal_up_switch)
578     up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx);
579 
580   // Clean out old info about up switch frames.
581   uint16_t old_picture_id = Subtract<kPicIdLength>(frame->id.picture_id, 50);
582   auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
583   up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
584 
585   size_t diff = ForwardDiff<uint16_t, kPicIdLength>(info->gof->pid_start,
586                                                     frame->id.picture_id);
587   size_t gof_idx = diff % info->gof->num_frames_in_gof;
588 
589   if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
590     return kDrop;
591   }
592   // Populate references according to the scalability structure.
593   frame->num_references = info->gof->num_ref_pics[gof_idx];
594   for (size_t i = 0; i < frame->num_references; ++i) {
595     frame->references[i] = Subtract<kPicIdLength>(
596         frame->id.picture_id, info->gof->pid_diff[gof_idx][i]);
597 
598     // If this is a reference to a frame earlier than the last up switch point,
599     // then ignore this reference.
600     if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx,
601                               frame->references[i])) {
602       --frame->num_references;
603     }
604   }
605 
606   // Override GOF references.
607   if (!codec_header.inter_pic_predicted) {
608     frame->num_references = 0;
609   }
610 
611   UnwrapPictureIds(frame);
612   return kHandOff;
613 }
614 
MissingRequiredFrameVp9(uint16_t picture_id,const GofInfo & info)615 bool RtpFrameReferenceFinder::MissingRequiredFrameVp9(uint16_t picture_id,
616                                                       const GofInfo& info) {
617   size_t diff =
618       ForwardDiff<uint16_t, kPicIdLength>(info.gof->pid_start, picture_id);
619   size_t gof_idx = diff % info.gof->num_frames_in_gof;
620   size_t temporal_idx = info.gof->temporal_idx[gof_idx];
621 
622   if (temporal_idx >= kMaxTemporalLayers) {
623     RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
624                         << " temporal "
625                            "layers are supported.";
626     return true;
627   }
628 
629   // For every reference this frame has, check if there is a frame missing in
630   // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
631   // layers. If so, we are missing a required frame.
632   uint8_t num_references = info.gof->num_ref_pics[gof_idx];
633   for (size_t i = 0; i < num_references; ++i) {
634     uint16_t ref_pid =
635         Subtract<kPicIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
636     for (size_t l = 0; l < temporal_idx; ++l) {
637       auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
638       if (missing_frame_it != missing_frames_for_layer_[l].end() &&
639           AheadOf<uint16_t, kPicIdLength>(picture_id, *missing_frame_it)) {
640         return true;
641       }
642     }
643   }
644   return false;
645 }
646 
FrameReceivedVp9(uint16_t picture_id,GofInfo * info)647 void RtpFrameReferenceFinder::FrameReceivedVp9(uint16_t picture_id,
648                                                GofInfo* info) {
649   int last_picture_id = info->last_picture_id;
650   size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
651 
652   // If there is a gap, find which temporal layer the missing frames
653   // belong to and add the frame as missing for that temporal layer.
654   // Otherwise, remove this frame from the set of missing frames.
655   if (AheadOf<uint16_t, kPicIdLength>(picture_id, last_picture_id)) {
656     size_t diff = ForwardDiff<uint16_t, kPicIdLength>(info->gof->pid_start,
657                                                       last_picture_id);
658     size_t gof_idx = diff % gof_size;
659 
660     last_picture_id = Add<kPicIdLength>(last_picture_id, 1);
661     while (last_picture_id != picture_id) {
662       gof_idx = (gof_idx + 1) % gof_size;
663       RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
664 
665       size_t temporal_idx = info->gof->temporal_idx[gof_idx];
666       if (temporal_idx >= kMaxTemporalLayers) {
667         RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
668                             << " temporal "
669                                "layers are supported.";
670         return;
671       }
672 
673       missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
674       last_picture_id = Add<kPicIdLength>(last_picture_id, 1);
675     }
676 
677     info->last_picture_id = last_picture_id;
678   } else {
679     size_t diff =
680         ForwardDiff<uint16_t, kPicIdLength>(info->gof->pid_start, picture_id);
681     size_t gof_idx = diff % gof_size;
682     RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
683 
684     size_t temporal_idx = info->gof->temporal_idx[gof_idx];
685     if (temporal_idx >= kMaxTemporalLayers) {
686       RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
687                           << " temporal "
688                              "layers are supported.";
689       return;
690     }
691 
692     missing_frames_for_layer_[temporal_idx].erase(picture_id);
693   }
694 }
695 
UpSwitchInIntervalVp9(uint16_t picture_id,uint8_t temporal_idx,uint16_t pid_ref)696 bool RtpFrameReferenceFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
697                                                     uint8_t temporal_idx,
698                                                     uint16_t pid_ref) {
699   for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
700        up_switch_it != up_switch_.end() &&
701        AheadOf<uint16_t, kPicIdLength>(picture_id, up_switch_it->first);
702        ++up_switch_it) {
703     if (up_switch_it->second < temporal_idx)
704       return true;
705   }
706 
707   return false;
708 }
709 
UnwrapPictureIds(RtpFrameObject * frame)710 void RtpFrameReferenceFinder::UnwrapPictureIds(RtpFrameObject* frame) {
711   for (size_t i = 0; i < frame->num_references; ++i)
712     frame->references[i] = unwrapper_.Unwrap(frame->references[i]);
713   frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id);
714 }
715 
UpdateLastPictureIdWithPaddingH264()716 void RtpFrameReferenceFinder::UpdateLastPictureIdWithPaddingH264() {
717   auto seq_num_it = last_seq_num_gop_.begin();
718 
719   // Check if next sequence number is in a stashed padding packet.
720   uint16_t next_padded_seq_num = seq_num_it->second.second + 1;
721   auto padding_seq_num_it = stashed_padding_.lower_bound(next_padded_seq_num);
722 
723   // Check for more consecutive padding packets to increment
724   // the "last-picture-id-with-padding" and remove the stashed packets.
725   while (padding_seq_num_it != stashed_padding_.end() &&
726          *padding_seq_num_it == next_padded_seq_num) {
727     seq_num_it->second.second = next_padded_seq_num;
728     ++next_padded_seq_num;
729     padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it);
730   }
731 }
732 
UpdateLayerInfoH264(RtpFrameObject * frame,int64_t unwrapped_tl0,uint8_t temporal_idx)733 void RtpFrameReferenceFinder::UpdateLayerInfoH264(RtpFrameObject* frame,
734                                                   int64_t unwrapped_tl0,
735                                                   uint8_t temporal_idx) {
736   auto layer_info_it = layer_info_.find(unwrapped_tl0);
737 
738   // Update this layer info and newer.
739   while (layer_info_it != layer_info_.end()) {
740     if (layer_info_it->second[temporal_idx] != -1 &&
741         AheadOf<uint16_t>(layer_info_it->second[temporal_idx],
742                           frame->id.picture_id)) {
743       // Not a newer frame. No subsequent layer info needs update.
744       break;
745     }
746 
747     layer_info_it->second[temporal_idx] = frame->id.picture_id;
748     ++unwrapped_tl0;
749     layer_info_it = layer_info_.find(unwrapped_tl0);
750   }
751 
752   for (size_t i = 0; i < frame->num_references; ++i)
753     frame->references[i] = rtp_seq_num_unwrapper_.Unwrap(frame->references[i]);
754   frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id);
755 }
756 
UpdateDataH264(RtpFrameObject * frame,int64_t unwrapped_tl0,uint8_t temporal_idx)757 void RtpFrameReferenceFinder::UpdateDataH264(RtpFrameObject* frame,
758                                              int64_t unwrapped_tl0,
759                                              uint8_t temporal_idx) {
760   // Update last_seq_num_gop_ entry for last picture id.
761   auto seq_num_it = last_seq_num_gop_.begin();
762   uint16_t last_pic_id = seq_num_it->second.first;
763   if (AheadOf<uint16_t>(frame->id.picture_id, last_pic_id)) {
764     seq_num_it->second.first = frame->id.picture_id;
765     seq_num_it->second.second = frame->id.picture_id;
766   }
767   UpdateLastPictureIdWithPaddingH264();
768 
769   UpdateLayerInfoH264(frame, unwrapped_tl0, temporal_idx);
770 
771   // Remove any current packets from |not_yet_received_seq_num_|.
772   uint16_t last_seq_num_padded = seq_num_it->second.second;
773   for (uint16_t n = frame->first_seq_num(); AheadOrAt(last_seq_num_padded, n);
774        ++n) {
775     not_yet_received_seq_num_.erase(n);
776   }
777 }
778 
779 }  // namespace video_coding
780 }  // namespace webrtc
781