1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "modules/video_coding/svc/scalability_structure_full_svc.h"
11 
12 #include <utility>
13 #include <vector>
14 
15 #include "absl/strings/string_view.h"
16 #include "absl/types/optional.h"
17 #include "api/transport/rtp/dependency_descriptor.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/logging.h"
20 
21 namespace webrtc {
22 namespace {
23 enum : int { kKey, kDelta };
24 }  // namespace
25 
26 constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers;
27 constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers;
28 constexpr absl::string_view ScalabilityStructureFullSvc::kFramePatternNames[];
29 
ScalabilityStructureFullSvc(int num_spatial_layers,int num_temporal_layers)30 ScalabilityStructureFullSvc::ScalabilityStructureFullSvc(
31     int num_spatial_layers,
32     int num_temporal_layers)
33     : num_spatial_layers_(num_spatial_layers),
34       num_temporal_layers_(num_temporal_layers),
35       active_decode_targets_(
36           (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
37   RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
38   RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
39 }
40 
41 ScalabilityStructureFullSvc::~ScalabilityStructureFullSvc() = default;
42 
43 ScalabilityStructureFullSvc::StreamLayersConfig
StreamConfig() const44 ScalabilityStructureFullSvc::StreamConfig() const {
45   StreamLayersConfig result;
46   result.num_spatial_layers = num_spatial_layers_;
47   result.num_temporal_layers = num_temporal_layers_;
48   result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
49   result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
50   for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
51     result.scaling_factor_num[sid - 1] = 1;
52     result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid];
53   }
54   return result;
55 }
56 
TemporalLayerIsActive(int tid) const57 bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const {
58   if (tid >= num_temporal_layers_) {
59     return false;
60   }
61   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
62     if (DecodeTargetIsActive(sid, tid)) {
63       return true;
64     }
65   }
66   return false;
67 }
68 
Dti(int sid,int tid,const LayerFrameConfig & config)69 DecodeTargetIndication ScalabilityStructureFullSvc::Dti(
70     int sid,
71     int tid,
72     const LayerFrameConfig& config) {
73   if (sid < config.SpatialId() || tid < config.TemporalId()) {
74     return DecodeTargetIndication::kNotPresent;
75   }
76   if (sid == config.SpatialId()) {
77     if (tid == 0) {
78       RTC_DCHECK_EQ(config.TemporalId(), 0);
79       return DecodeTargetIndication::kSwitch;
80     }
81     if (tid == config.TemporalId()) {
82       return DecodeTargetIndication::kDiscardable;
83     }
84     if (tid > config.TemporalId()) {
85       RTC_DCHECK_GT(tid, config.TemporalId());
86       return DecodeTargetIndication::kSwitch;
87     }
88   }
89   RTC_DCHECK_GT(sid, config.SpatialId());
90   RTC_DCHECK_GE(tid, config.TemporalId());
91   if (config.IsKeyframe() || config.Id() == kKey) {
92     return DecodeTargetIndication::kSwitch;
93   }
94   return DecodeTargetIndication::kRequired;
95 }
96 
97 ScalabilityStructureFullSvc::FramePattern
NextPattern() const98 ScalabilityStructureFullSvc::NextPattern() const {
99   switch (last_pattern_) {
100     case kNone:
101     case kDeltaT2B:
102       return kDeltaT0;
103     case kDeltaT2A:
104       if (TemporalLayerIsActive(1)) {
105         return kDeltaT1;
106       }
107       return kDeltaT0;
108     case kDeltaT1:
109       if (TemporalLayerIsActive(2)) {
110         return kDeltaT2B;
111       }
112       return kDeltaT0;
113     case kDeltaT0:
114       if (TemporalLayerIsActive(2)) {
115         return kDeltaT2A;
116       }
117       if (TemporalLayerIsActive(1)) {
118         return kDeltaT1;
119       }
120       return kDeltaT0;
121   }
122 }
123 
124 std::vector<ScalableVideoController::LayerFrameConfig>
NextFrameConfig(bool restart)125 ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
126   std::vector<LayerFrameConfig> configs;
127   if (active_decode_targets_.none()) {
128     last_pattern_ = kNone;
129     return configs;
130   }
131   configs.reserve(num_spatial_layers_);
132 
133   if (last_pattern_ == kNone || restart) {
134     can_reference_t0_frame_for_spatial_id_.reset();
135     last_pattern_ = kNone;
136   }
137   FramePattern current_pattern = NextPattern();
138 
139   absl::optional<int> spatial_dependency_buffer_id;
140   switch (current_pattern) {
141     case kDeltaT0:
142       // Disallow temporal references cross T0 on higher temporal layers.
143       can_reference_t1_frame_for_spatial_id_.reset();
144       for (int sid = 0; sid < num_spatial_layers_; ++sid) {
145         if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
146           // Next frame from the spatial layer `sid` shouldn't depend on
147           // potentially old previous frame from the spatial layer `sid`.
148           can_reference_t0_frame_for_spatial_id_.reset(sid);
149           continue;
150         }
151         configs.emplace_back();
152         ScalableVideoController::LayerFrameConfig& config = configs.back();
153         config.Id(last_pattern_ == kNone ? kKey : kDelta).S(sid).T(0);
154 
155         if (spatial_dependency_buffer_id) {
156           config.Reference(*spatial_dependency_buffer_id);
157         } else if (last_pattern_ == kNone) {
158           config.Keyframe();
159         }
160 
161         if (can_reference_t0_frame_for_spatial_id_[sid]) {
162           config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0));
163         } else {
164           // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame
165           // to ChainDiffCalculator
166           config.Update(BufferIndex(sid, /*tid=*/0));
167         }
168 
169         can_reference_t0_frame_for_spatial_id_.set(sid);
170         spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
171       }
172       break;
173     case kDeltaT1:
174       for (int sid = 0; sid < num_spatial_layers_; ++sid) {
175         if (!DecodeTargetIsActive(sid, /*tid=*/1) ||
176             !can_reference_t0_frame_for_spatial_id_[sid]) {
177           continue;
178         }
179         configs.emplace_back();
180         ScalableVideoController::LayerFrameConfig& config = configs.back();
181         config.Id(kDelta).S(sid).T(1);
182         // Temporal reference.
183         config.Reference(BufferIndex(sid, /*tid=*/0));
184         // Spatial reference unless this is the lowest active spatial layer.
185         if (spatial_dependency_buffer_id) {
186           config.Reference(*spatial_dependency_buffer_id);
187         }
188         // No frame reference top layer frame, so no need save it into a buffer.
189         if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) {
190           config.Update(BufferIndex(sid, /*tid=*/1));
191           can_reference_t1_frame_for_spatial_id_.set(sid);
192         }
193         spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1);
194       }
195       break;
196     case kDeltaT2A:
197     case kDeltaT2B:
198       for (int sid = 0; sid < num_spatial_layers_; ++sid) {
199         if (!DecodeTargetIsActive(sid, /*tid=*/2) ||
200             !can_reference_t0_frame_for_spatial_id_[sid]) {
201           continue;
202         }
203         configs.emplace_back();
204         ScalableVideoController::LayerFrameConfig& config = configs.back();
205         config.Id(kDelta).S(sid).T(2);
206         // Temporal reference.
207         if (current_pattern == kDeltaT2B &&
208             can_reference_t1_frame_for_spatial_id_[sid]) {
209           config.Reference(BufferIndex(sid, /*tid=*/1));
210         } else {
211           config.Reference(BufferIndex(sid, /*tid=*/0));
212         }
213         // Spatial reference unless this is the lowest active spatial layer.
214         if (spatial_dependency_buffer_id) {
215           config.Reference(*spatial_dependency_buffer_id);
216         }
217         // No frame reference top layer frame, so no need save it into a buffer.
218         if (sid < num_spatial_layers_ - 1) {
219           config.Update(BufferIndex(sid, /*tid=*/2));
220         }
221         spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2);
222       }
223       break;
224     case kNone:
225       RTC_NOTREACHED();
226       break;
227   }
228 
229   if (configs.empty() && !restart) {
230     RTC_LOG(LS_WARNING) << "Failed to generate configuration for L"
231                         << num_spatial_layers_ << "T" << num_temporal_layers_
232                         << " with active decode targets "
233                         << active_decode_targets_.to_string('-').substr(
234                                active_decode_targets_.size() -
235                                num_spatial_layers_ * num_temporal_layers_)
236                         << " and transition from "
237                         << kFramePatternNames[last_pattern_] << " to "
238                         << kFramePatternNames[current_pattern]
239                         << ". Resetting.";
240     return NextFrameConfig(/*restart=*/true);
241   }
242 
243   last_pattern_ = current_pattern;
244   return configs;
245 }
246 
OnEncodeDone(const LayerFrameConfig & config)247 GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone(
248     const LayerFrameConfig& config) {
249   GenericFrameInfo frame_info;
250   frame_info.spatial_id = config.SpatialId();
251   frame_info.temporal_id = config.TemporalId();
252   frame_info.encoder_buffers = config.Buffers();
253   frame_info.decode_target_indications.reserve(num_spatial_layers_ *
254                                                num_temporal_layers_);
255   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
256     for (int tid = 0; tid < num_temporal_layers_; ++tid) {
257       frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
258     }
259   }
260   if (config.TemporalId() == 0) {
261     frame_info.part_of_chain.resize(num_spatial_layers_);
262     for (int sid = 0; sid < num_spatial_layers_; ++sid) {
263       frame_info.part_of_chain[sid] = config.SpatialId() <= sid;
264     }
265   } else {
266     frame_info.part_of_chain.assign(num_spatial_layers_, false);
267   }
268   frame_info.active_decode_targets = active_decode_targets_;
269   return frame_info;
270 }
271 
OnRatesUpdated(const VideoBitrateAllocation & bitrates)272 void ScalabilityStructureFullSvc::OnRatesUpdated(
273     const VideoBitrateAllocation& bitrates) {
274   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
275     // Enable/disable spatial layers independetely.
276     bool active = true;
277     for (int tid = 0; tid < num_temporal_layers_; ++tid) {
278       // To enable temporal layer, require bitrates for lower temporal layers.
279       active = active && bitrates.GetBitrate(sid, tid) > 0;
280       SetDecodeTargetIsActive(sid, tid, active);
281     }
282   }
283 }
284 
285 }  // namespace webrtc
286