1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/rtp_format_vp9.h"
12 
13 #include <string.h>
14 
15 #include "api/video/video_codec_constants.h"
16 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
17 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
18 #include "modules/video_coding/codecs/interface/common_constants.h"
19 #include "rtc_base/bit_buffer.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/logging.h"
22 
23 #define RETURN_FALSE_ON_ERROR(x) \
24   if (!(x)) {                    \
25     return false;                \
26   }
27 
28 namespace webrtc {
29 namespace {
30 // Length of VP9 payload descriptors' fixed part.
31 const size_t kFixedPayloadDescriptorBytes = 1;
32 
33 const uint32_t kReservedBitValue0 = 0;
34 
TemporalIdxField(const RTPVideoHeaderVP9 & hdr,uint8_t def)35 uint8_t TemporalIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
36   return (hdr.temporal_idx == kNoTemporalIdx) ? def : hdr.temporal_idx;
37 }
38 
SpatialIdxField(const RTPVideoHeaderVP9 & hdr,uint8_t def)39 uint8_t SpatialIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
40   return (hdr.spatial_idx == kNoSpatialIdx) ? def : hdr.spatial_idx;
41 }
42 
Tl0PicIdxField(const RTPVideoHeaderVP9 & hdr,uint8_t def)43 int16_t Tl0PicIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
44   return (hdr.tl0_pic_idx == kNoTl0PicIdx) ? def : hdr.tl0_pic_idx;
45 }
46 
47 // Picture ID:
48 //
49 //      +-+-+-+-+-+-+-+-+
50 // I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
51 //      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
52 // M:   | EXTENDED PID  |
53 //      +-+-+-+-+-+-+-+-+
54 //
PictureIdLength(const RTPVideoHeaderVP9 & hdr)55 size_t PictureIdLength(const RTPVideoHeaderVP9& hdr) {
56   if (hdr.picture_id == kNoPictureId)
57     return 0;
58   return (hdr.max_picture_id == kMaxOneBytePictureId) ? 1 : 2;
59 }
60 
PictureIdPresent(const RTPVideoHeaderVP9 & hdr)61 bool PictureIdPresent(const RTPVideoHeaderVP9& hdr) {
62   return PictureIdLength(hdr) > 0;
63 }
64 
65 // Layer indices:
66 //
67 // Flexible mode (F=1):     Non-flexible mode (F=0):
68 //
69 //      +-+-+-+-+-+-+-+-+   +-+-+-+-+-+-+-+-+
70 // L:   |  T  |U|  S  |D|   |  T  |U|  S  |D|
71 //      +-+-+-+-+-+-+-+-+   +-+-+-+-+-+-+-+-+
72 //                          |   TL0PICIDX   |
73 //                          +-+-+-+-+-+-+-+-+
74 //
LayerInfoLength(const RTPVideoHeaderVP9 & hdr)75 size_t LayerInfoLength(const RTPVideoHeaderVP9& hdr) {
76   if (hdr.temporal_idx == kNoTemporalIdx && hdr.spatial_idx == kNoSpatialIdx) {
77     return 0;
78   }
79   return hdr.flexible_mode ? 1 : 2;
80 }
81 
LayerInfoPresent(const RTPVideoHeaderVP9 & hdr)82 bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) {
83   return LayerInfoLength(hdr) > 0;
84 }
85 
86 // Reference indices:
87 //
88 //      +-+-+-+-+-+-+-+-+                P=1,F=1: At least one reference index
89 // P,F: | P_DIFF      |N|  up to 3 times          has to be specified.
90 //      +-+-+-+-+-+-+-+-+                    N=1: An additional P_DIFF follows
91 //                                                current P_DIFF.
92 //
RefIndicesLength(const RTPVideoHeaderVP9 & hdr)93 size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) {
94   if (!hdr.inter_pic_predicted || !hdr.flexible_mode)
95     return 0;
96 
97   RTC_DCHECK_GT(hdr.num_ref_pics, 0U);
98   RTC_DCHECK_LE(hdr.num_ref_pics, kMaxVp9RefPics);
99   return hdr.num_ref_pics;
100 }
101 
102 // Scalability structure (SS).
103 //
104 //      +-+-+-+-+-+-+-+-+
105 // V:   | N_S |Y|G|-|-|-|
106 //      +-+-+-+-+-+-+-+-+              -|
107 // Y:   |     WIDTH     | (OPTIONAL)    .
108 //      +               +               .
109 //      |               | (OPTIONAL)    .
110 //      +-+-+-+-+-+-+-+-+               . N_S + 1 times
111 //      |     HEIGHT    | (OPTIONAL)    .
112 //      +               +               .
113 //      |               | (OPTIONAL)    .
114 //      +-+-+-+-+-+-+-+-+              -|
115 // G:   |      N_G      | (OPTIONAL)
116 //      +-+-+-+-+-+-+-+-+                           -|
117 // N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
118 //      +-+-+-+-+-+-+-+-+              -|            . N_G times
119 //      |    P_DIFF     | (OPTIONAL)    . R times    .
120 //      +-+-+-+-+-+-+-+-+              -|           -|
121 //
SsDataLength(const RTPVideoHeaderVP9 & hdr)122 size_t SsDataLength(const RTPVideoHeaderVP9& hdr) {
123   if (!hdr.ss_data_available)
124     return 0;
125 
126   RTC_DCHECK_GT(hdr.num_spatial_layers, 0U);
127   RTC_DCHECK_LE(hdr.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
128   RTC_DCHECK_LE(hdr.gof.num_frames_in_gof, kMaxVp9FramesInGof);
129   size_t length = 1;  // V
130   if (hdr.spatial_layer_resolution_present) {
131     length += 4 * hdr.num_spatial_layers;  // Y
132   }
133   if (hdr.gof.num_frames_in_gof > 0) {
134     ++length;  // G
135   }
136   // N_G
137   length += hdr.gof.num_frames_in_gof;  // T, U, R
138   for (size_t i = 0; i < hdr.gof.num_frames_in_gof; ++i) {
139     RTC_DCHECK_LE(hdr.gof.num_ref_pics[i], kMaxVp9RefPics);
140     length += hdr.gof.num_ref_pics[i];  // R times
141   }
142   return length;
143 }
144 
PayloadDescriptorLengthMinusSsData(const RTPVideoHeaderVP9 & hdr)145 size_t PayloadDescriptorLengthMinusSsData(const RTPVideoHeaderVP9& hdr) {
146   return kFixedPayloadDescriptorBytes + PictureIdLength(hdr) +
147          LayerInfoLength(hdr) + RefIndicesLength(hdr);
148 }
149 
150 // Picture ID:
151 //
152 //      +-+-+-+-+-+-+-+-+
153 // I:   |M| PICTURE ID  |   M:0 => picture id is 7 bits.
154 //      +-+-+-+-+-+-+-+-+   M:1 => picture id is 15 bits.
155 // M:   | EXTENDED PID  |
156 //      +-+-+-+-+-+-+-+-+
157 //
WritePictureId(const RTPVideoHeaderVP9 & vp9,rtc::BitBufferWriter * writer)158 bool WritePictureId(const RTPVideoHeaderVP9& vp9,
159                     rtc::BitBufferWriter* writer) {
160   bool m_bit = (PictureIdLength(vp9) == 2);
161   RETURN_FALSE_ON_ERROR(writer->WriteBits(m_bit ? 1 : 0, 1));
162   RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.picture_id, m_bit ? 15 : 7));
163   return true;
164 }
165 
166 // Layer indices:
167 //
168 // Flexible mode (F=1):
169 //
170 //      +-+-+-+-+-+-+-+-+
171 // L:   |  T  |U|  S  |D|
172 //      +-+-+-+-+-+-+-+-+
173 //
WriteLayerInfoCommon(const RTPVideoHeaderVP9 & vp9,rtc::BitBufferWriter * writer)174 bool WriteLayerInfoCommon(const RTPVideoHeaderVP9& vp9,
175                           rtc::BitBufferWriter* writer) {
176   RETURN_FALSE_ON_ERROR(writer->WriteBits(TemporalIdxField(vp9, 0), 3));
177   RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.temporal_up_switch ? 1 : 0, 1));
178   RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
179   RETURN_FALSE_ON_ERROR(
180       writer->WriteBits(vp9.inter_layer_predicted ? 1 : 0, 1));
181   return true;
182 }
183 
184 // Non-flexible mode (F=0):
185 //
186 //      +-+-+-+-+-+-+-+-+
187 // L:   |  T  |U|  S  |D|
188 //      +-+-+-+-+-+-+-+-+
189 //      |   TL0PICIDX   |
190 //      +-+-+-+-+-+-+-+-+
191 //
WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9 & vp9,rtc::BitBufferWriter * writer)192 bool WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9& vp9,
193                                    rtc::BitBufferWriter* writer) {
194   RETURN_FALSE_ON_ERROR(writer->WriteUInt8(Tl0PicIdxField(vp9, 0)));
195   return true;
196 }
197 
WriteLayerInfo(const RTPVideoHeaderVP9 & vp9,rtc::BitBufferWriter * writer)198 bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9,
199                     rtc::BitBufferWriter* writer) {
200   if (!WriteLayerInfoCommon(vp9, writer))
201     return false;
202 
203   if (vp9.flexible_mode)
204     return true;
205 
206   return WriteLayerInfoNonFlexibleMode(vp9, writer);
207 }
208 
209 // Reference indices:
210 //
211 //      +-+-+-+-+-+-+-+-+                P=1,F=1: At least one reference index
212 // P,F: | P_DIFF      |N|  up to 3 times          has to be specified.
213 //      +-+-+-+-+-+-+-+-+                    N=1: An additional P_DIFF follows
214 //                                                current P_DIFF.
215 //
WriteRefIndices(const RTPVideoHeaderVP9 & vp9,rtc::BitBufferWriter * writer)216 bool WriteRefIndices(const RTPVideoHeaderVP9& vp9,
217                      rtc::BitBufferWriter* writer) {
218   if (!PictureIdPresent(vp9) || vp9.num_ref_pics == 0 ||
219       vp9.num_ref_pics > kMaxVp9RefPics) {
220     return false;
221   }
222   for (uint8_t i = 0; i < vp9.num_ref_pics; ++i) {
223     bool n_bit = !(i == vp9.num_ref_pics - 1);
224     RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 7));
225     RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1));
226   }
227   return true;
228 }
229 
230 // Scalability structure (SS).
231 //
232 //      +-+-+-+-+-+-+-+-+
233 // V:   | N_S |Y|G|-|-|-|
234 //      +-+-+-+-+-+-+-+-+              -|
235 // Y:   |     WIDTH     | (OPTIONAL)    .
236 //      +               +               .
237 //      |               | (OPTIONAL)    .
238 //      +-+-+-+-+-+-+-+-+               . N_S + 1 times
239 //      |     HEIGHT    | (OPTIONAL)    .
240 //      +               +               .
241 //      |               | (OPTIONAL)    .
242 //      +-+-+-+-+-+-+-+-+              -|
243 // G:   |      N_G      | (OPTIONAL)
244 //      +-+-+-+-+-+-+-+-+                           -|
245 // N_G: |  T  |U| R |-|-| (OPTIONAL)                 .
246 //      +-+-+-+-+-+-+-+-+              -|            . N_G times
247 //      |    P_DIFF     | (OPTIONAL)    . R times    .
248 //      +-+-+-+-+-+-+-+-+              -|           -|
249 //
WriteSsData(const RTPVideoHeaderVP9 & vp9,rtc::BitBufferWriter * writer)250 bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) {
251   RTC_DCHECK_GT(vp9.num_spatial_layers, 0U);
252   RTC_DCHECK_LE(vp9.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
253   RTC_DCHECK_LE(vp9.gof.num_frames_in_gof, kMaxVp9FramesInGof);
254   bool g_bit = vp9.gof.num_frames_in_gof > 0;
255 
256   RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.num_spatial_layers - 1, 3));
257   RETURN_FALSE_ON_ERROR(
258       writer->WriteBits(vp9.spatial_layer_resolution_present ? 1 : 0, 1));
259   RETURN_FALSE_ON_ERROR(writer->WriteBits(g_bit ? 1 : 0, 1));  // G
260   RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 3));
261 
262   if (vp9.spatial_layer_resolution_present) {
263     for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
264       RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.width[i]));
265       RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.height[i]));
266     }
267   }
268   if (g_bit) {
269     RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.num_frames_in_gof));
270   }
271   for (size_t i = 0; i < vp9.gof.num_frames_in_gof; ++i) {
272     RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.temporal_idx[i], 3));
273     RETURN_FALSE_ON_ERROR(
274         writer->WriteBits(vp9.gof.temporal_up_switch[i] ? 1 : 0, 1));
275     RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_ref_pics[i], 2));
276     RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 2));
277     for (uint8_t r = 0; r < vp9.gof.num_ref_pics[i]; ++r) {
278       RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.pid_diff[i][r]));
279     }
280   }
281   return true;
282 }
283 
284 // TODO(https://bugs.webrtc.org/11319):
285 // Workaround for switching off spatial layers on the fly.
286 // Sent layers must start from SL0 on RTP layer, but can start from any
287 // spatial layer because WebRTC-SVC api isn't implemented yet and
288 // current API to invoke SVC is not flexible enough.
RemoveInactiveSpatialLayers(const RTPVideoHeaderVP9 & original_header)289 RTPVideoHeaderVP9 RemoveInactiveSpatialLayers(
290     const RTPVideoHeaderVP9& original_header) {
291   RTPVideoHeaderVP9 hdr(original_header);
292   if (original_header.first_active_layer == 0)
293     return hdr;
294   for (size_t i = hdr.first_active_layer; i < hdr.num_spatial_layers; ++i) {
295     hdr.width[i - hdr.first_active_layer] = hdr.width[i];
296     hdr.height[i - hdr.first_active_layer] = hdr.height[i];
297   }
298   for (size_t i = hdr.num_spatial_layers - hdr.first_active_layer;
299        i < hdr.num_spatial_layers; ++i) {
300     hdr.width[i] = 0;
301     hdr.height[i] = 0;
302   }
303   hdr.num_spatial_layers -= hdr.first_active_layer;
304   hdr.spatial_idx -= hdr.first_active_layer;
305   hdr.first_active_layer = 0;
306   return hdr;
307 }
308 }  // namespace
309 
RtpPacketizerVp9(rtc::ArrayView<const uint8_t> payload,PayloadSizeLimits limits,const RTPVideoHeaderVP9 & hdr)310 RtpPacketizerVp9::RtpPacketizerVp9(rtc::ArrayView<const uint8_t> payload,
311                                    PayloadSizeLimits limits,
312                                    const RTPVideoHeaderVP9& hdr)
313     : hdr_(RemoveInactiveSpatialLayers(hdr)),
314       header_size_(PayloadDescriptorLengthMinusSsData(hdr_)),
315       first_packet_extra_header_size_(SsDataLength(hdr_)),
316       remaining_payload_(payload) {
317   RTC_DCHECK_EQ(hdr_.first_active_layer, 0);
318 
319   limits.max_payload_len -= header_size_;
320   limits.first_packet_reduction_len += first_packet_extra_header_size_;
321   limits.single_packet_reduction_len += first_packet_extra_header_size_;
322 
323   payload_sizes_ = SplitAboutEqually(payload.size(), limits);
324   current_packet_ = payload_sizes_.begin();
325 }
326 
327 RtpPacketizerVp9::~RtpPacketizerVp9() = default;
328 
NumPackets() const329 size_t RtpPacketizerVp9::NumPackets() const {
330   return payload_sizes_.end() - current_packet_;
331 }
332 
NextPacket(RtpPacketToSend * packet)333 bool RtpPacketizerVp9::NextPacket(RtpPacketToSend* packet) {
334   RTC_DCHECK(packet);
335   if (current_packet_ == payload_sizes_.end()) {
336     return false;
337   }
338 
339   bool layer_begin = current_packet_ == payload_sizes_.begin();
340   int packet_payload_len = *current_packet_;
341   ++current_packet_;
342   bool layer_end = current_packet_ == payload_sizes_.end();
343 
344   int header_size = header_size_;
345   if (layer_begin)
346     header_size += first_packet_extra_header_size_;
347 
348   uint8_t* buffer = packet->AllocatePayload(header_size + packet_payload_len);
349   RTC_CHECK(buffer);
350 
351   if (!WriteHeader(layer_begin, layer_end,
352                    rtc::MakeArrayView(buffer, header_size)))
353     return false;
354 
355   memcpy(buffer + header_size, remaining_payload_.data(), packet_payload_len);
356   remaining_payload_ = remaining_payload_.subview(packet_payload_len);
357 
358   // Ensure end_of_picture is always set on top spatial layer when it is not
359   // dropped.
360   RTC_DCHECK(hdr_.spatial_idx < hdr_.num_spatial_layers - 1 ||
361              hdr_.end_of_picture);
362 
363   packet->SetMarker(layer_end && hdr_.end_of_picture);
364   return true;
365 }
366 
367 // VP9 format:
368 //
369 // Payload descriptor for F = 1 (flexible mode)
370 //       0 1 2 3 4 5 6 7
371 //      +-+-+-+-+-+-+-+-+
372 //      |I|P|L|F|B|E|V|Z| (REQUIRED)
373 //      +-+-+-+-+-+-+-+-+
374 // I:   |M| PICTURE ID  | (RECOMMENDED)
375 //      +-+-+-+-+-+-+-+-+
376 // M:   | EXTENDED PID  | (RECOMMENDED)
377 //      +-+-+-+-+-+-+-+-+
378 // L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
379 //      +-+-+-+-+-+-+-+-+                             -|
380 // P,F: | P_DIFF      |N| (CONDITIONALLY RECOMMENDED)  . up to 3 times
381 //      +-+-+-+-+-+-+-+-+                             -|
382 // V:   | SS            |
383 //      | ..            |
384 //      +-+-+-+-+-+-+-+-+
385 //
386 // Payload descriptor for F = 0 (non-flexible mode)
387 //       0 1 2 3 4 5 6 7
388 //      +-+-+-+-+-+-+-+-+
389 //      |I|P|L|F|B|E|V|Z| (REQUIRED)
390 //      +-+-+-+-+-+-+-+-+
391 // I:   |M| PICTURE ID  | (RECOMMENDED)
392 //      +-+-+-+-+-+-+-+-+
393 // M:   | EXTENDED PID  | (RECOMMENDED)
394 //      +-+-+-+-+-+-+-+-+
395 // L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
396 //      +-+-+-+-+-+-+-+-+
397 //      |   TL0PICIDX   | (CONDITIONALLY REQUIRED)
398 //      +-+-+-+-+-+-+-+-+
399 // V:   | SS            |
400 //      | ..            |
401 //      +-+-+-+-+-+-+-+-+
WriteHeader(bool layer_begin,bool layer_end,rtc::ArrayView<uint8_t> buffer) const402 bool RtpPacketizerVp9::WriteHeader(bool layer_begin,
403                                    bool layer_end,
404                                    rtc::ArrayView<uint8_t> buffer) const {
405   // Required payload descriptor byte.
406   bool i_bit = PictureIdPresent(hdr_);
407   bool p_bit = hdr_.inter_pic_predicted;
408   bool l_bit = LayerInfoPresent(hdr_);
409   bool f_bit = hdr_.flexible_mode;
410   bool b_bit = layer_begin;
411   bool e_bit = layer_end;
412   bool v_bit = hdr_.ss_data_available && b_bit;
413   bool z_bit = hdr_.non_ref_for_inter_layer_pred;
414 
415   rtc::BitBufferWriter writer(buffer.data(), buffer.size());
416   RETURN_FALSE_ON_ERROR(writer.WriteBits(i_bit ? 1 : 0, 1));
417   RETURN_FALSE_ON_ERROR(writer.WriteBits(p_bit ? 1 : 0, 1));
418   RETURN_FALSE_ON_ERROR(writer.WriteBits(l_bit ? 1 : 0, 1));
419   RETURN_FALSE_ON_ERROR(writer.WriteBits(f_bit ? 1 : 0, 1));
420   RETURN_FALSE_ON_ERROR(writer.WriteBits(b_bit ? 1 : 0, 1));
421   RETURN_FALSE_ON_ERROR(writer.WriteBits(e_bit ? 1 : 0, 1));
422   RETURN_FALSE_ON_ERROR(writer.WriteBits(v_bit ? 1 : 0, 1));
423   RETURN_FALSE_ON_ERROR(writer.WriteBits(z_bit ? 1 : 0, 1));
424 
425   // Add fields that are present.
426   if (i_bit && !WritePictureId(hdr_, &writer)) {
427     RTC_LOG(LS_ERROR) << "Failed writing VP9 picture id.";
428     return false;
429   }
430   if (l_bit && !WriteLayerInfo(hdr_, &writer)) {
431     RTC_LOG(LS_ERROR) << "Failed writing VP9 layer info.";
432     return false;
433   }
434   if (p_bit && f_bit && !WriteRefIndices(hdr_, &writer)) {
435     RTC_LOG(LS_ERROR) << "Failed writing VP9 ref indices.";
436     return false;
437   }
438   if (v_bit && !WriteSsData(hdr_, &writer)) {
439     RTC_LOG(LS_ERROR) << "Failed writing VP9 SS data.";
440     return false;
441   }
442 
443   size_t offset_bytes = 0;
444   size_t offset_bits = 0;
445   writer.GetCurrentOffset(&offset_bytes, &offset_bits);
446   RTC_DCHECK_EQ(offset_bits, 0);
447   RTC_DCHECK_EQ(offset_bytes, buffer.size());
448   return true;
449 }
450 
451 }  // namespace webrtc
452