1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "modules/video_coding/svc/scalability_structure_key_svc.h"
11 
12 #include <bitset>
13 #include <utility>
14 #include <vector>
15 
16 #include "absl/types/optional.h"
17 #include "api/transport/rtp/dependency_descriptor.h"
18 #include "api/video/video_bitrate_allocation.h"
19 #include "common_video/generic_frame_descriptor/generic_frame_info.h"
20 #include "modules/video_coding/svc/scalable_video_controller.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23 
24 namespace webrtc {
25 namespace {
26 // Values to use as LayerFrameConfig::Id
27 enum : int { kKey, kDelta };
28 
29 DecodeTargetIndication
Dti(int sid,int tid,const ScalableVideoController::LayerFrameConfig & config)30 Dti(int sid, int tid, const ScalableVideoController::LayerFrameConfig& config) {
31   if (config.IsKeyframe() || config.Id() == kKey) {
32     RTC_DCHECK_EQ(config.TemporalId(), 0);
33     return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
34                                     : DecodeTargetIndication::kSwitch;
35   }
36 
37   if (sid != config.SpatialId() || tid < config.TemporalId()) {
38     return DecodeTargetIndication::kNotPresent;
39   }
40   if (tid == config.TemporalId() && tid > 0) {
41     return DecodeTargetIndication::kDiscardable;
42   }
43   return DecodeTargetIndication::kSwitch;
44 }
45 
46 }  // namespace
47 
48 constexpr int ScalabilityStructureKeySvc::kMaxNumSpatialLayers;
49 constexpr int ScalabilityStructureKeySvc::kMaxNumTemporalLayers;
50 
ScalabilityStructureKeySvc(int num_spatial_layers,int num_temporal_layers)51 ScalabilityStructureKeySvc::ScalabilityStructureKeySvc(int num_spatial_layers,
52                                                        int num_temporal_layers)
53     : num_spatial_layers_(num_spatial_layers),
54       num_temporal_layers_(num_temporal_layers),
55       active_decode_targets_(
56           (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
57   // There is no point to use this structure without spatial scalability.
58   RTC_DCHECK_GT(num_spatial_layers, 1);
59   RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
60   RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
61 }
62 
63 ScalabilityStructureKeySvc::~ScalabilityStructureKeySvc() = default;
64 
65 ScalableVideoController::StreamLayersConfig
StreamConfig() const66 ScalabilityStructureKeySvc::StreamConfig() const {
67   StreamLayersConfig result;
68   result.num_spatial_layers = num_spatial_layers_;
69   result.num_temporal_layers = num_temporal_layers_;
70   result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
71   result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
72   for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
73     result.scaling_factor_num[sid - 1] = 1;
74     result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid];
75   }
76   return result;
77 }
78 
TemporalLayerIsActive(int tid) const79 bool ScalabilityStructureKeySvc::TemporalLayerIsActive(int tid) const {
80   if (tid >= num_temporal_layers_) {
81     return false;
82   }
83   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
84     if (DecodeTargetIsActive(sid, tid)) {
85       return true;
86     }
87   }
88   return false;
89 }
90 
91 std::vector<ScalableVideoController::LayerFrameConfig>
KeyframeConfig()92 ScalabilityStructureKeySvc::KeyframeConfig() {
93   std::vector<LayerFrameConfig> configs;
94   configs.reserve(num_spatial_layers_);
95   absl::optional<int> spatial_dependency_buffer_id;
96   spatial_id_is_enabled_.reset();
97   // Disallow temporal references cross T0 on higher temporal layers.
98   can_reference_t1_frame_for_spatial_id_.reset();
99   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
100     if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
101       continue;
102     }
103     configs.emplace_back();
104     ScalableVideoController::LayerFrameConfig& config = configs.back();
105     config.Id(kKey).S(sid).T(0);
106 
107     if (spatial_dependency_buffer_id) {
108       config.Reference(*spatial_dependency_buffer_id);
109     } else {
110       config.Keyframe();
111     }
112     config.Update(BufferIndex(sid, /*tid=*/0));
113 
114     spatial_id_is_enabled_.set(sid);
115     spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
116   }
117   return configs;
118 }
119 
120 std::vector<ScalableVideoController::LayerFrameConfig>
T0Config()121 ScalabilityStructureKeySvc::T0Config() {
122   std::vector<LayerFrameConfig> configs;
123   configs.reserve(num_spatial_layers_);
124   // Disallow temporal references cross T0 on higher temporal layers.
125   can_reference_t1_frame_for_spatial_id_.reset();
126   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
127     if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
128       spatial_id_is_enabled_.reset(sid);
129       continue;
130     }
131     configs.emplace_back();
132     configs.back().Id(kDelta).S(sid).T(0).ReferenceAndUpdate(
133         BufferIndex(sid, /*tid=*/0));
134   }
135   return configs;
136 }
137 
138 std::vector<ScalableVideoController::LayerFrameConfig>
T1Config()139 ScalabilityStructureKeySvc::T1Config() {
140   std::vector<LayerFrameConfig> configs;
141   configs.reserve(num_spatial_layers_);
142   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
143     if (!DecodeTargetIsActive(sid, /*tid=*/1)) {
144       continue;
145     }
146     configs.emplace_back();
147     ScalableVideoController::LayerFrameConfig& config = configs.back();
148     config.Id(kDelta).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
149     if (num_temporal_layers_ > 2) {
150       config.Update(BufferIndex(sid, /*tid=*/1));
151       can_reference_t1_frame_for_spatial_id_.set(sid);
152     }
153   }
154   return configs;
155 }
156 
157 std::vector<ScalableVideoController::LayerFrameConfig>
T2Config()158 ScalabilityStructureKeySvc::T2Config() {
159   std::vector<LayerFrameConfig> configs;
160   configs.reserve(num_spatial_layers_);
161   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
162     if (!DecodeTargetIsActive(sid, /*tid=*/2)) {
163       continue;
164     }
165     configs.emplace_back();
166     ScalableVideoController::LayerFrameConfig& config = configs.back();
167     config.Id(kDelta).S(sid).T(2);
168     if (can_reference_t1_frame_for_spatial_id_[sid]) {
169       config.Reference(BufferIndex(sid, /*tid=*/1));
170     } else {
171       config.Reference(BufferIndex(sid, /*tid=*/0));
172     }
173   }
174   return configs;
175 }
176 
177 std::vector<ScalableVideoController::LayerFrameConfig>
NextFrameConfig(bool restart)178 ScalabilityStructureKeySvc::NextFrameConfig(bool restart) {
179   if (active_decode_targets_.none()) {
180     last_pattern_ = kNone;
181     return {};
182   }
183 
184   if (restart) {
185     last_pattern_ = kNone;
186   }
187 
188   switch (last_pattern_) {
189     case kNone:
190       last_pattern_ = kDeltaT0;
191       return KeyframeConfig();
192     case kDeltaT2B:
193       last_pattern_ = kDeltaT0;
194       return T0Config();
195     case kDeltaT2A:
196       if (TemporalLayerIsActive(1)) {
197         last_pattern_ = kDeltaT1;
198         return T1Config();
199       }
200       last_pattern_ = kDeltaT0;
201       return T0Config();
202     case kDeltaT1:
203       if (TemporalLayerIsActive(2)) {
204         last_pattern_ = kDeltaT2B;
205         return T2Config();
206       }
207       last_pattern_ = kDeltaT0;
208       return T0Config();
209     case kDeltaT0:
210       if (TemporalLayerIsActive(2)) {
211         last_pattern_ = kDeltaT2A;
212         return T2Config();
213       } else if (TemporalLayerIsActive(1)) {
214         last_pattern_ = kDeltaT1;
215         return T1Config();
216       }
217       last_pattern_ = kDeltaT0;
218       return T0Config();
219   }
220   RTC_NOTREACHED();
221   return {};
222 }
223 
OnEncodeDone(const LayerFrameConfig & config)224 GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
225     const LayerFrameConfig& config) {
226   GenericFrameInfo frame_info;
227   frame_info.spatial_id = config.SpatialId();
228   frame_info.temporal_id = config.TemporalId();
229   frame_info.encoder_buffers = config.Buffers();
230   frame_info.decode_target_indications.reserve(num_spatial_layers_ *
231                                                num_temporal_layers_);
232   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
233     for (int tid = 0; tid < num_temporal_layers_; ++tid) {
234       frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
235     }
236   }
237   frame_info.part_of_chain.assign(num_spatial_layers_, false);
238   if (config.IsKeyframe() || config.Id() == kKey) {
239     RTC_DCHECK_EQ(config.TemporalId(), 0);
240     for (int sid = config.SpatialId(); sid < num_spatial_layers_; ++sid) {
241       frame_info.part_of_chain[sid] = true;
242     }
243   } else if (config.TemporalId() == 0) {
244     frame_info.part_of_chain[config.SpatialId()] = true;
245   }
246   frame_info.active_decode_targets = active_decode_targets_;
247   return frame_info;
248 }
249 
OnRatesUpdated(const VideoBitrateAllocation & bitrates)250 void ScalabilityStructureKeySvc::OnRatesUpdated(
251     const VideoBitrateAllocation& bitrates) {
252   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
253     // Enable/disable spatial layers independetely.
254     bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0;
255     SetDecodeTargetIsActive(sid, /*tid=*/0, active);
256     if (!spatial_id_is_enabled_[sid] && active) {
257       // Key frame is required to reenable any spatial layer.
258       last_pattern_ = kNone;
259     }
260 
261     for (int tid = 1; tid < num_temporal_layers_; ++tid) {
262       // To enable temporal layer, require bitrates for lower temporal layers.
263       active = active && bitrates.GetBitrate(sid, tid) > 0;
264       SetDecodeTargetIsActive(sid, tid, active);
265     }
266   }
267 }
268 
269 ScalabilityStructureL2T1Key::~ScalabilityStructureL2T1Key() = default;
270 
DependencyStructure() const271 FrameDependencyStructure ScalabilityStructureL2T1Key::DependencyStructure()
272     const {
273   FrameDependencyStructure structure;
274   structure.num_decode_targets = 2;
275   structure.num_chains = 2;
276   structure.decode_target_protected_by_chain = {0, 1};
277   structure.templates.resize(4);
278   structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2});
279   structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0});
280   structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2});
281   structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1});
282   return structure;
283 }
284 
285 ScalabilityStructureL2T2Key::~ScalabilityStructureL2T2Key() = default;
286 
DependencyStructure() const287 FrameDependencyStructure ScalabilityStructureL2T2Key::DependencyStructure()
288     const {
289   FrameDependencyStructure structure;
290   structure.num_decode_targets = 4;
291   structure.num_chains = 2;
292   structure.decode_target_protected_by_chain = {0, 0, 1, 1};
293   structure.templates.resize(6);
294   auto& templates = structure.templates;
295   templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
296   templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4});
297   templates[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2});
298   templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
299   templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4});
300   templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2});
301   return structure;
302 }
303 
304 ScalabilityStructureL3T3Key::~ScalabilityStructureL3T3Key() = default;
305 
DependencyStructure() const306 FrameDependencyStructure ScalabilityStructureL3T3Key::DependencyStructure()
307     const {
308   FrameDependencyStructure structure;
309   structure.num_decode_targets = 9;
310   structure.num_chains = 3;
311   structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
312   auto& t = structure.templates;
313   t.resize(15);
314   // Templates are shown in the order frames following them appear in the
315   // stream, but in `structure.templates` array templates are sorted by
316   // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
317   // requirement. Indexes are written in hex for nicer alignment.
318   t[0x0].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0});
319   t[0x5].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
320   t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
321   t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3});
322   t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3});
323   t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
324   t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6});
325   t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6});
326   t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6});
327   t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3});
328   t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3});
329   t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3});
330   t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12});
331   t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12});
332   t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12});
333   return structure;
334 }
335 
336 }  // namespace webrtc
337