1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "modules/video_coding/svc/scalability_structure_key_svc.h"
11
12 #include <bitset>
13 #include <utility>
14 #include <vector>
15
16 #include "absl/types/optional.h"
17 #include "api/transport/rtp/dependency_descriptor.h"
18 #include "api/video/video_bitrate_allocation.h"
19 #include "common_video/generic_frame_descriptor/generic_frame_info.h"
20 #include "modules/video_coding/svc/scalable_video_controller.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23
24 namespace webrtc {
25 namespace {
26 // Values to use as LayerFrameConfig::Id
27 enum : int { kKey, kDelta };
28
29 DecodeTargetIndication
Dti(int sid,int tid,const ScalableVideoController::LayerFrameConfig & config)30 Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) {
31 if (config.IsKeyframe() || config.Id() == kKey) {
32 RTC_DCHECK_EQ(config.TemporalId(), 0);
33 return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
34 : DecodeTargetIndication::kSwitch;
35 }
36
37 if (sid != config.SpatialId() || tid < config.TemporalId()) {
38 return DecodeTargetIndication::kNotPresent;
39 }
40 if (tid == config.TemporalId() && tid > 0) {
41 return DecodeTargetIndication::kDiscardable;
42 }
43 return DecodeTargetIndication::kSwitch;
44 }
45
46 } // namespace
47
48 constexpr int ScalabilityStructureKeySvc::kMaxNumSpatialLayers;
49 constexpr int ScalabilityStructureKeySvc::kMaxNumTemporalLayers;
50
ScalabilityStructureKeySvc(int num_spatial_layers,int num_temporal_layers)51 ScalabilityStructureKeySvc::ScalabilityStructureKeySvc(int num_spatial_layers,
52 int num_temporal_layers)
53 : num_spatial_layers_(num_spatial_layers),
54 num_temporal_layers_(num_temporal_layers),
55 active_decode_targets_(
56 (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
57 // There is no point to use this structure without spatial scalability.
58 RTC_DCHECK_GT(num_spatial_layers, 1);
59 RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
60 RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
61 }
62
63 ScalabilityStructureKeySvc::~ScalabilityStructureKeySvc() = default;
64
65 ScalableVideoController::StreamLayersConfig
StreamConfig() const66 ScalabilityStructureKeySvc::StreamConfig() const {
67 StreamLayersConfig result;
68 result.num_spatial_layers = num_spatial_layers_;
69 result.num_temporal_layers = num_temporal_layers_;
70 result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
71 result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
72 for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
73 result.scaling_factor_num[sid - 1] = 1;
74 result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid];
75 }
76 return result;
77 }
78
TemporalLayerIsActive(int tid) const79 bool ScalabilityStructureKeySvc::TemporalLayerIsActive(int tid) const {
80 if (tid >= num_temporal_layers_) {
81 return false;
82 }
83 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
84 if (DecodeTargetIsActive(sid, tid)) {
85 return true;
86 }
87 }
88 return false;
89 }
90
91 std::vector<ScalableVideoController::LayerFrameConfig>
KeyframeConfig()92 ScalabilityStructureKeySvc::KeyframeConfig() {
93 std::vector<LayerFrameConfig> configs;
94 configs.reserve(num_spatial_layers_);
95 absl::optional<int> spatial_dependency_buffer_id;
96 spatial_id_is_enabled_.reset();
97 // Disallow temporal references cross T0 on higher temporal layers.
98 can_reference_t1_frame_for_spatial_id_.reset();
99 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
100 if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
101 continue;
102 }
103 configs.emplace_back();
104 ScalableVideoController::LayerFrameConfig& config = configs.back();
105 config.Id(kKey).S(sid).T(0);
106
107 if (spatial_dependency_buffer_id) {
108 config.Reference(*spatial_dependency_buffer_id);
109 } else {
110 config.Keyframe();
111 }
112 config.Update(BufferIndex(sid, /*tid=*/0));
113
114 spatial_id_is_enabled_.set(sid);
115 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
116 }
117 return configs;
118 }
119
120 std::vector<ScalableVideoController::LayerFrameConfig>
T0Config()121 ScalabilityStructureKeySvc::T0Config() {
122 std::vector<LayerFrameConfig> configs;
123 configs.reserve(num_spatial_layers_);
124 // Disallow temporal references cross T0 on higher temporal layers.
125 can_reference_t1_frame_for_spatial_id_.reset();
126 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
127 if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
128 spatial_id_is_enabled_.reset(sid);
129 continue;
130 }
131 configs.emplace_back();
132 configs.back().Id(kDelta).S(sid).T(0).ReferenceAndUpdate(
133 BufferIndex(sid, /*tid=*/0));
134 }
135 return configs;
136 }
137
138 std::vector<ScalableVideoController::LayerFrameConfig>
T1Config()139 ScalabilityStructureKeySvc::T1Config() {
140 std::vector<LayerFrameConfig> configs;
141 configs.reserve(num_spatial_layers_);
142 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
143 if (!DecodeTargetIsActive(sid, /*tid=*/1)) {
144 continue;
145 }
146 configs.emplace_back();
147 ScalableVideoController::LayerFrameConfig& config = configs.back();
148 config.Id(kDelta).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
149 if (num_temporal_layers_ > 2) {
150 config.Update(BufferIndex(sid, /*tid=*/1));
151 can_reference_t1_frame_for_spatial_id_.set(sid);
152 }
153 }
154 return configs;
155 }
156
157 std::vector<ScalableVideoController::LayerFrameConfig>
T2Config()158 ScalabilityStructureKeySvc::T2Config() {
159 std::vector<LayerFrameConfig> configs;
160 configs.reserve(num_spatial_layers_);
161 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
162 if (!DecodeTargetIsActive(sid, /*tid=*/2)) {
163 continue;
164 }
165 configs.emplace_back();
166 ScalableVideoController::LayerFrameConfig& config = configs.back();
167 config.Id(kDelta).S(sid).T(2);
168 if (can_reference_t1_frame_for_spatial_id_[sid]) {
169 config.Reference(BufferIndex(sid, /*tid=*/1));
170 } else {
171 config.Reference(BufferIndex(sid, /*tid=*/0));
172 }
173 }
174 return configs;
175 }
176
177 std::vector<ScalableVideoController::LayerFrameConfig>
NextFrameConfig(bool restart)178 ScalabilityStructureKeySvc::NextFrameConfig(bool restart) {
179 if (active_decode_targets_.none()) {
180 last_pattern_ = kNone;
181 return {};
182 }
183
184 if (restart) {
185 last_pattern_ = kNone;
186 }
187
188 switch (last_pattern_) {
189 case kNone:
190 last_pattern_ = kDeltaT0;
191 return KeyframeConfig();
192 case kDeltaT2B:
193 last_pattern_ = kDeltaT0;
194 return T0Config();
195 case kDeltaT2A:
196 if (TemporalLayerIsActive(1)) {
197 last_pattern_ = kDeltaT1;
198 return T1Config();
199 }
200 last_pattern_ = kDeltaT0;
201 return T0Config();
202 case kDeltaT1:
203 if (TemporalLayerIsActive(2)) {
204 last_pattern_ = kDeltaT2B;
205 return T2Config();
206 }
207 last_pattern_ = kDeltaT0;
208 return T0Config();
209 case kDeltaT0:
210 if (TemporalLayerIsActive(2)) {
211 last_pattern_ = kDeltaT2A;
212 return T2Config();
213 } else if (TemporalLayerIsActive(1)) {
214 last_pattern_ = kDeltaT1;
215 return T1Config();
216 }
217 last_pattern_ = kDeltaT0;
218 return T0Config();
219 }
220 RTC_NOTREACHED();
221 return {};
222 }
223
OnEncodeDone(const LayerFrameConfig & config)224 GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
225 const LayerFrameConfig& config) {
226 GenericFrameInfo frame_info;
227 frame_info.spatial_id = config.SpatialId();
228 frame_info.temporal_id = config.TemporalId();
229 frame_info.encoder_buffers = config.Buffers();
230 frame_info.decode_target_indications.reserve(num_spatial_layers_ *
231 num_temporal_layers_);
232 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
233 for (int tid = 0; tid < num_temporal_layers_; ++tid) {
234 frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
235 }
236 }
237 frame_info.part_of_chain.assign(num_spatial_layers_, false);
238 if (config.IsKeyframe() || config.Id() == kKey) {
239 RTC_DCHECK_EQ(config.TemporalId(), 0);
240 for (int sid = config.SpatialId(); sid < num_spatial_layers_; ++sid) {
241 frame_info.part_of_chain[sid] = true;
242 }
243 } else if (config.TemporalId() == 0) {
244 frame_info.part_of_chain[config.SpatialId()] = true;
245 }
246 frame_info.active_decode_targets = active_decode_targets_;
247 return frame_info;
248 }
249
OnRatesUpdated(const VideoBitrateAllocation & bitrates)250 void ScalabilityStructureKeySvc::OnRatesUpdated(
251 const VideoBitrateAllocation& bitrates) {
252 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
253 // Enable/disable spatial layers independetely.
254 bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0;
255 SetDecodeTargetIsActive(sid, /*tid=*/0, active);
256 if (!spatial_id_is_enabled_[sid] && active) {
257 // Key frame is required to reenable any spatial layer.
258 last_pattern_ = kNone;
259 }
260
261 for (int tid = 1; tid < num_temporal_layers_; ++tid) {
262 // To enable temporal layer, require bitrates for lower temporal layers.
263 active = active && bitrates.GetBitrate(sid, tid) > 0;
264 SetDecodeTargetIsActive(sid, tid, active);
265 }
266 }
267 }
268
269 ScalabilityStructureL2T1Key::~ScalabilityStructureL2T1Key() = default;
270
DependencyStructure() const271 FrameDependencyStructure ScalabilityStructureL2T1Key::DependencyStructure()
272 const {
273 FrameDependencyStructure structure;
274 structure.num_decode_targets = 2;
275 structure.num_chains = 2;
276 structure.decode_target_protected_by_chain = {0, 1};
277 structure.templates.resize(4);
278 structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2});
279 structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0});
280 structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2});
281 structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1});
282 return structure;
283 }
284
285 ScalabilityStructureL2T2Key::~ScalabilityStructureL2T2Key() = default;
286
DependencyStructure() const287 FrameDependencyStructure ScalabilityStructureL2T2Key::DependencyStructure()
288 const {
289 FrameDependencyStructure structure;
290 structure.num_decode_targets = 4;
291 structure.num_chains = 2;
292 structure.decode_target_protected_by_chain = {0, 0, 1, 1};
293 structure.templates.resize(6);
294 auto& templates = structure.templates;
295 templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
296 templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4});
297 templates[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2});
298 templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
299 templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4});
300 templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2});
301 return structure;
302 }
303
304 ScalabilityStructureL3T3Key::~ScalabilityStructureL3T3Key() = default;
305
DependencyStructure() const306 FrameDependencyStructure ScalabilityStructureL3T3Key::DependencyStructure()
307 const {
308 FrameDependencyStructure structure;
309 structure.num_decode_targets = 9;
310 structure.num_chains = 3;
311 structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
312 auto& t = structure.templates;
313 t.resize(15);
314 // Templates are shown in the order frames following them appear in the
315 // stream, but in `structure.templates` array templates are sorted by
316 // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
317 // requirement. Indexes are written in hex for nicer alignment.
318 t[0x0].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0});
319 t[0x5].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
320 t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
321 t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3});
322 t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3});
323 t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
324 t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6});
325 t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6});
326 t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6});
327 t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3});
328 t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3});
329 t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3});
330 t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12});
331 t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12});
332 t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12});
333 return structure;
334 }
335
336 } // namespace webrtc
337