1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/rtp_vp9_ref_finder.h"
12 
13 #include <algorithm>
14 #include <utility>
15 
16 #include "rtc_base/logging.h"
17 
18 namespace webrtc {
19 
ManageFrame(std::unique_ptr<RtpFrameObject> frame)20 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
21     std::unique_ptr<RtpFrameObject> frame) {
22   FrameDecision decision = ManageFrameInternal(frame.get());
23 
24   RtpFrameReferenceFinder::ReturnVector res;
25   switch (decision) {
26     case kStash:
27       if (stashed_frames_.size() > kMaxStashedFrames)
28         stashed_frames_.pop_back();
29       stashed_frames_.push_front(std::move(frame));
30       return res;
31     case kHandOff:
32       res.push_back(std::move(frame));
33       RetryStashedFrames(res);
34       return res;
35     case kDrop:
36       return res;
37   }
38 
39   return res;
40 }
41 
ManageFrameInternal(RtpFrameObject * frame)42 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
43     RtpFrameObject* frame) {
44   const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
45   const RTPVideoHeaderVP9& codec_header =
46       absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
47 
48   // Protect against corrupted packets with arbitrary large temporal idx.
49   if (codec_header.temporal_idx >= kMaxTemporalLayers ||
50       codec_header.spatial_idx >= kMaxSpatialLayers)
51     return kDrop;
52 
53   frame->SetSpatialIndex(codec_header.spatial_idx);
54   frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
55 
56   if (last_picture_id_ == -1)
57     last_picture_id_ = frame->Id();
58 
59   if (codec_header.flexible_mode) {
60     if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
61       return kDrop;
62     }
63     frame->num_references = codec_header.num_ref_pics;
64     for (size_t i = 0; i < frame->num_references; ++i) {
65       frame->references[i] =
66           Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
67     }
68 
69     FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
70     return kHandOff;
71   }
72 
73   if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
74     RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
75                            "non-flexible mode.";
76     return kDrop;
77   }
78 
79   GofInfo* info;
80   int64_t unwrapped_tl0 =
81       tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
82   if (codec_header.ss_data_available) {
83     if (codec_header.temporal_idx != 0) {
84       RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
85                              "layer frame. Scalability structure ignored.";
86     } else {
87       if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
88         return kDrop;
89       }
90 
91       for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
92         if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
93           return kDrop;
94         }
95       }
96 
97       GofInfoVP9 gof = codec_header.gof;
98       if (gof.num_frames_in_gof == 0) {
99         RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
100                                "that stream has only one temporal layer.";
101         gof.SetGofInfoVP9(kTemporalStructureMode1);
102       }
103 
104       current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
105       scalability_structures_[current_ss_idx_] = gof;
106       scalability_structures_[current_ss_idx_].pid_start = frame->Id();
107       gof_info_.emplace(
108           unwrapped_tl0,
109           GofInfo(&scalability_structures_[current_ss_idx_], frame->Id()));
110     }
111 
112     const auto gof_info_it = gof_info_.find(unwrapped_tl0);
113     if (gof_info_it == gof_info_.end())
114       return kStash;
115 
116     info = &gof_info_it->second;
117 
118     if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
119       frame->num_references = 0;
120       FrameReceivedVp9(frame->Id(), info);
121       FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
122       return kHandOff;
123     }
124   } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
125     if (frame->SpatialIndex() == 0) {
126       RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
127       return kDrop;
128     }
129     const auto gof_info_it = gof_info_.find(unwrapped_tl0);
130     if (gof_info_it == gof_info_.end())
131       return kStash;
132 
133     info = &gof_info_it->second;
134 
135     frame->num_references = 0;
136     FrameReceivedVp9(frame->Id(), info);
137     FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
138     return kHandOff;
139   } else {
140     auto gof_info_it = gof_info_.find(
141         (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
142 
143     // Gof info for this frame is not available yet, stash this frame.
144     if (gof_info_it == gof_info_.end())
145       return kStash;
146 
147     if (codec_header.temporal_idx == 0) {
148       gof_info_it = gof_info_
149                         .emplace(unwrapped_tl0,
150                                  GofInfo(gof_info_it->second.gof, frame->Id()))
151                         .first;
152     }
153 
154     info = &gof_info_it->second;
155   }
156 
157   // Clean up info for base layers that are too old.
158   int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
159   auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
160   gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
161 
162   FrameReceivedVp9(frame->Id(), info);
163 
164   // Make sure we don't miss any frame that could potentially have the
165   // up switch flag set.
166   if (MissingRequiredFrameVp9(frame->Id(), *info))
167     return kStash;
168 
169   if (codec_header.temporal_up_switch)
170     up_switch_.emplace(frame->Id(), codec_header.temporal_idx);
171 
172   // Clean out old info about up switch frames.
173   uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50);
174   auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
175   up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
176 
177   size_t diff =
178       ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, frame->Id());
179   size_t gof_idx = diff % info->gof->num_frames_in_gof;
180 
181   if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
182     return kDrop;
183   }
184   // Populate references according to the scalability structure.
185   frame->num_references = info->gof->num_ref_pics[gof_idx];
186   for (size_t i = 0; i < frame->num_references; ++i) {
187     frame->references[i] =
188         Subtract<kFrameIdLength>(frame->Id(), info->gof->pid_diff[gof_idx][i]);
189 
190     // If this is a reference to a frame earlier than the last up switch point,
191     // then ignore this reference.
192     if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx,
193                               frame->references[i])) {
194       --frame->num_references;
195     }
196   }
197 
198   // Override GOF references.
199   if (!codec_header.inter_pic_predicted) {
200     frame->num_references = 0;
201   }
202 
203   FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
204   return kHandOff;
205 }
206 
MissingRequiredFrameVp9(uint16_t picture_id,const GofInfo & info)207 bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
208                                               const GofInfo& info) {
209   size_t diff =
210       ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
211   size_t gof_idx = diff % info.gof->num_frames_in_gof;
212   size_t temporal_idx = info.gof->temporal_idx[gof_idx];
213 
214   if (temporal_idx >= kMaxTemporalLayers) {
215     RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
216                         << " temporal "
217                            "layers are supported.";
218     return true;
219   }
220 
221   // For every reference this frame has, check if there is a frame missing in
222   // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
223   // layers. If so, we are missing a required frame.
224   uint8_t num_references = info.gof->num_ref_pics[gof_idx];
225   for (size_t i = 0; i < num_references; ++i) {
226     uint16_t ref_pid =
227         Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
228     for (size_t l = 0; l < temporal_idx; ++l) {
229       auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
230       if (missing_frame_it != missing_frames_for_layer_[l].end() &&
231           AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
232         return true;
233       }
234     }
235   }
236   return false;
237 }
238 
FrameReceivedVp9(uint16_t picture_id,GofInfo * info)239 void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
240   int last_picture_id = info->last_picture_id;
241   size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
242 
243   // If there is a gap, find which temporal layer the missing frames
244   // belong to and add the frame as missing for that temporal layer.
245   // Otherwise, remove this frame from the set of missing frames.
246   if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
247     size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
248                                                         last_picture_id);
249     size_t gof_idx = diff % gof_size;
250 
251     last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
252     while (last_picture_id != picture_id) {
253       gof_idx = (gof_idx + 1) % gof_size;
254       RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
255 
256       size_t temporal_idx = info->gof->temporal_idx[gof_idx];
257       if (temporal_idx >= kMaxTemporalLayers) {
258         RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
259                             << " temporal "
260                                "layers are supported.";
261         return;
262       }
263 
264       missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
265       last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
266     }
267 
268     info->last_picture_id = last_picture_id;
269   } else {
270     size_t diff =
271         ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
272     size_t gof_idx = diff % gof_size;
273     RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
274 
275     size_t temporal_idx = info->gof->temporal_idx[gof_idx];
276     if (temporal_idx >= kMaxTemporalLayers) {
277       RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
278                           << " temporal "
279                              "layers are supported.";
280       return;
281     }
282 
283     missing_frames_for_layer_[temporal_idx].erase(picture_id);
284   }
285 }
286 
UpSwitchInIntervalVp9(uint16_t picture_id,uint8_t temporal_idx,uint16_t pid_ref)287 bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
288                                             uint8_t temporal_idx,
289                                             uint16_t pid_ref) {
290   for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
291        up_switch_it != up_switch_.end() &&
292        AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
293        ++up_switch_it) {
294     if (up_switch_it->second < temporal_idx)
295       return true;
296   }
297 
298   return false;
299 }
300 
RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector & res)301 void RtpVp9RefFinder::RetryStashedFrames(
302     RtpFrameReferenceFinder::ReturnVector& res) {
303   bool complete_frame = false;
304   do {
305     complete_frame = false;
306     for (auto frame_it = stashed_frames_.begin();
307          frame_it != stashed_frames_.end();) {
308       FrameDecision decision = ManageFrameInternal(frame_it->get());
309 
310       switch (decision) {
311         case kStash:
312           ++frame_it;
313           break;
314         case kHandOff:
315           complete_frame = true;
316           res.push_back(std::move(*frame_it));
317           ABSL_FALLTHROUGH_INTENDED;
318         case kDrop:
319           frame_it = stashed_frames_.erase(frame_it);
320       }
321     }
322   } while (complete_frame);
323 }
324 
FlattenFrameIdAndRefs(RtpFrameObject * frame,bool inter_layer_predicted)325 void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
326                                             bool inter_layer_predicted) {
327   for (size_t i = 0; i < frame->num_references; ++i) {
328     frame->references[i] =
329         unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
330         *frame->SpatialIndex();
331   }
332   frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers +
333                *frame->SpatialIndex());
334 
335   if (inter_layer_predicted &&
336       frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
337     frame->references[frame->num_references] = frame->Id() - 1;
338     ++frame->num_references;
339   }
340 }
341 
ClearTo(uint16_t seq_num)342 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
343   auto it = stashed_frames_.begin();
344   while (it != stashed_frames_.end()) {
345     if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
346       it = stashed_frames_.erase(it);
347     } else {
348       ++it;
349     }
350   }
351 }
352 
353 }  // namespace webrtc
354