1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/video_coding/rtp_vp9_ref_finder.h"
12
13 #include <algorithm>
14 #include <utility>
15
16 #include "rtc_base/logging.h"
17
18 namespace webrtc {
19
ManageFrame(std::unique_ptr<RtpFrameObject> frame)20 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
21 std::unique_ptr<RtpFrameObject> frame) {
22 FrameDecision decision = ManageFrameInternal(frame.get());
23
24 RtpFrameReferenceFinder::ReturnVector res;
25 switch (decision) {
26 case kStash:
27 if (stashed_frames_.size() > kMaxStashedFrames)
28 stashed_frames_.pop_back();
29 stashed_frames_.push_front(std::move(frame));
30 return res;
31 case kHandOff:
32 res.push_back(std::move(frame));
33 RetryStashedFrames(res);
34 return res;
35 case kDrop:
36 return res;
37 }
38
39 return res;
40 }
41
ManageFrameInternal(RtpFrameObject * frame)42 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
43 RtpFrameObject* frame) {
44 const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
45 const RTPVideoHeaderVP9& codec_header =
46 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
47
48 // Protect against corrupted packets with arbitrary large temporal idx.
49 if (codec_header.temporal_idx >= kMaxTemporalLayers ||
50 codec_header.spatial_idx >= kMaxSpatialLayers)
51 return kDrop;
52
53 frame->SetSpatialIndex(codec_header.spatial_idx);
54 frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
55
56 if (last_picture_id_ == -1)
57 last_picture_id_ = frame->Id();
58
59 if (codec_header.flexible_mode) {
60 if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
61 return kDrop;
62 }
63 frame->num_references = codec_header.num_ref_pics;
64 for (size_t i = 0; i < frame->num_references; ++i) {
65 frame->references[i] =
66 Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
67 }
68
69 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
70 return kHandOff;
71 }
72
73 if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
74 RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
75 "non-flexible mode.";
76 return kDrop;
77 }
78
79 GofInfo* info;
80 int64_t unwrapped_tl0 =
81 tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
82 if (codec_header.ss_data_available) {
83 if (codec_header.temporal_idx != 0) {
84 RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
85 "layer frame. Scalability structure ignored.";
86 } else {
87 if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
88 return kDrop;
89 }
90
91 for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
92 if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
93 return kDrop;
94 }
95 }
96
97 GofInfoVP9 gof = codec_header.gof;
98 if (gof.num_frames_in_gof == 0) {
99 RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
100 "that stream has only one temporal layer.";
101 gof.SetGofInfoVP9(kTemporalStructureMode1);
102 }
103
104 current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
105 scalability_structures_[current_ss_idx_] = gof;
106 scalability_structures_[current_ss_idx_].pid_start = frame->Id();
107 gof_info_.emplace(
108 unwrapped_tl0,
109 GofInfo(&scalability_structures_[current_ss_idx_], frame->Id()));
110 }
111
112 const auto gof_info_it = gof_info_.find(unwrapped_tl0);
113 if (gof_info_it == gof_info_.end())
114 return kStash;
115
116 info = &gof_info_it->second;
117
118 if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
119 frame->num_references = 0;
120 FrameReceivedVp9(frame->Id(), info);
121 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
122 return kHandOff;
123 }
124 } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
125 if (frame->SpatialIndex() == 0) {
126 RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
127 return kDrop;
128 }
129 const auto gof_info_it = gof_info_.find(unwrapped_tl0);
130 if (gof_info_it == gof_info_.end())
131 return kStash;
132
133 info = &gof_info_it->second;
134
135 frame->num_references = 0;
136 FrameReceivedVp9(frame->Id(), info);
137 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
138 return kHandOff;
139 } else {
140 auto gof_info_it = gof_info_.find(
141 (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
142
143 // Gof info for this frame is not available yet, stash this frame.
144 if (gof_info_it == gof_info_.end())
145 return kStash;
146
147 if (codec_header.temporal_idx == 0) {
148 gof_info_it = gof_info_
149 .emplace(unwrapped_tl0,
150 GofInfo(gof_info_it->second.gof, frame->Id()))
151 .first;
152 }
153
154 info = &gof_info_it->second;
155 }
156
157 // Clean up info for base layers that are too old.
158 int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
159 auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
160 gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
161
162 FrameReceivedVp9(frame->Id(), info);
163
164 // Make sure we don't miss any frame that could potentially have the
165 // up switch flag set.
166 if (MissingRequiredFrameVp9(frame->Id(), *info))
167 return kStash;
168
169 if (codec_header.temporal_up_switch)
170 up_switch_.emplace(frame->Id(), codec_header.temporal_idx);
171
172 // Clean out old info about up switch frames.
173 uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50);
174 auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
175 up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
176
177 size_t diff =
178 ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, frame->Id());
179 size_t gof_idx = diff % info->gof->num_frames_in_gof;
180
181 if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
182 return kDrop;
183 }
184 // Populate references according to the scalability structure.
185 frame->num_references = info->gof->num_ref_pics[gof_idx];
186 for (size_t i = 0; i < frame->num_references; ++i) {
187 frame->references[i] =
188 Subtract<kFrameIdLength>(frame->Id(), info->gof->pid_diff[gof_idx][i]);
189
190 // If this is a reference to a frame earlier than the last up switch point,
191 // then ignore this reference.
192 if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx,
193 frame->references[i])) {
194 --frame->num_references;
195 }
196 }
197
198 // Override GOF references.
199 if (!codec_header.inter_pic_predicted) {
200 frame->num_references = 0;
201 }
202
203 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
204 return kHandOff;
205 }
206
MissingRequiredFrameVp9(uint16_t picture_id,const GofInfo & info)207 bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
208 const GofInfo& info) {
209 size_t diff =
210 ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
211 size_t gof_idx = diff % info.gof->num_frames_in_gof;
212 size_t temporal_idx = info.gof->temporal_idx[gof_idx];
213
214 if (temporal_idx >= kMaxTemporalLayers) {
215 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
216 << " temporal "
217 "layers are supported.";
218 return true;
219 }
220
221 // For every reference this frame has, check if there is a frame missing in
222 // the interval (|ref_pid|, |picture_id|) in any of the lower temporal
223 // layers. If so, we are missing a required frame.
224 uint8_t num_references = info.gof->num_ref_pics[gof_idx];
225 for (size_t i = 0; i < num_references; ++i) {
226 uint16_t ref_pid =
227 Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
228 for (size_t l = 0; l < temporal_idx; ++l) {
229 auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
230 if (missing_frame_it != missing_frames_for_layer_[l].end() &&
231 AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
232 return true;
233 }
234 }
235 }
236 return false;
237 }
238
FrameReceivedVp9(uint16_t picture_id,GofInfo * info)239 void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
240 int last_picture_id = info->last_picture_id;
241 size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
242
243 // If there is a gap, find which temporal layer the missing frames
244 // belong to and add the frame as missing for that temporal layer.
245 // Otherwise, remove this frame from the set of missing frames.
246 if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
247 size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
248 last_picture_id);
249 size_t gof_idx = diff % gof_size;
250
251 last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
252 while (last_picture_id != picture_id) {
253 gof_idx = (gof_idx + 1) % gof_size;
254 RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
255
256 size_t temporal_idx = info->gof->temporal_idx[gof_idx];
257 if (temporal_idx >= kMaxTemporalLayers) {
258 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
259 << " temporal "
260 "layers are supported.";
261 return;
262 }
263
264 missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
265 last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
266 }
267
268 info->last_picture_id = last_picture_id;
269 } else {
270 size_t diff =
271 ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
272 size_t gof_idx = diff % gof_size;
273 RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
274
275 size_t temporal_idx = info->gof->temporal_idx[gof_idx];
276 if (temporal_idx >= kMaxTemporalLayers) {
277 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
278 << " temporal "
279 "layers are supported.";
280 return;
281 }
282
283 missing_frames_for_layer_[temporal_idx].erase(picture_id);
284 }
285 }
286
UpSwitchInIntervalVp9(uint16_t picture_id,uint8_t temporal_idx,uint16_t pid_ref)287 bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
288 uint8_t temporal_idx,
289 uint16_t pid_ref) {
290 for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
291 up_switch_it != up_switch_.end() &&
292 AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
293 ++up_switch_it) {
294 if (up_switch_it->second < temporal_idx)
295 return true;
296 }
297
298 return false;
299 }
300
RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector & res)301 void RtpVp9RefFinder::RetryStashedFrames(
302 RtpFrameReferenceFinder::ReturnVector& res) {
303 bool complete_frame = false;
304 do {
305 complete_frame = false;
306 for (auto frame_it = stashed_frames_.begin();
307 frame_it != stashed_frames_.end();) {
308 FrameDecision decision = ManageFrameInternal(frame_it->get());
309
310 switch (decision) {
311 case kStash:
312 ++frame_it;
313 break;
314 case kHandOff:
315 complete_frame = true;
316 res.push_back(std::move(*frame_it));
317 ABSL_FALLTHROUGH_INTENDED;
318 case kDrop:
319 frame_it = stashed_frames_.erase(frame_it);
320 }
321 }
322 } while (complete_frame);
323 }
324
FlattenFrameIdAndRefs(RtpFrameObject * frame,bool inter_layer_predicted)325 void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
326 bool inter_layer_predicted) {
327 for (size_t i = 0; i < frame->num_references; ++i) {
328 frame->references[i] =
329 unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
330 *frame->SpatialIndex();
331 }
332 frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers +
333 *frame->SpatialIndex());
334
335 if (inter_layer_predicted &&
336 frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
337 frame->references[frame->num_references] = frame->Id() - 1;
338 ++frame->num_references;
339 }
340 }
341
ClearTo(uint16_t seq_num)342 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
343 auto it = stashed_frames_.begin();
344 while (it != stashed_frames_.end()) {
345 if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
346 it = stashed_frames_.erase(it);
347 } else {
348 ++it;
349 }
350 }
351 }
352
353 } // namespace webrtc
354