1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
12 
13 #include <memory>
14 #include <vector>
15 
16 #include "api/array_view.h"
17 #include "test/gmock.h"
18 #include "test/gtest.h"
19 
20 namespace webrtc {
21 namespace {
22 
VerifyHeader(const RTPVideoHeaderVP9 & expected,const RTPVideoHeaderVP9 & actual)23 void VerifyHeader(const RTPVideoHeaderVP9& expected,
24                   const RTPVideoHeaderVP9& actual) {
25   EXPECT_EQ(expected.inter_layer_predicted, actual.inter_layer_predicted);
26   EXPECT_EQ(expected.inter_pic_predicted, actual.inter_pic_predicted);
27   EXPECT_EQ(expected.flexible_mode, actual.flexible_mode);
28   EXPECT_EQ(expected.beginning_of_frame, actual.beginning_of_frame);
29   EXPECT_EQ(expected.end_of_frame, actual.end_of_frame);
30   EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
31   EXPECT_EQ(expected.non_ref_for_inter_layer_pred,
32             actual.non_ref_for_inter_layer_pred);
33   EXPECT_EQ(expected.picture_id, actual.picture_id);
34   EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
35   EXPECT_EQ(expected.temporal_idx, actual.temporal_idx);
36   EXPECT_EQ(expected.spatial_idx, actual.spatial_idx);
37   EXPECT_EQ(expected.gof_idx, actual.gof_idx);
38   EXPECT_EQ(expected.tl0_pic_idx, actual.tl0_pic_idx);
39   EXPECT_EQ(expected.temporal_up_switch, actual.temporal_up_switch);
40 
41   EXPECT_EQ(expected.num_ref_pics, actual.num_ref_pics);
42   for (uint8_t i = 0; i < expected.num_ref_pics; ++i) {
43     EXPECT_EQ(expected.pid_diff[i], actual.pid_diff[i]);
44     EXPECT_EQ(expected.ref_picture_id[i], actual.ref_picture_id[i]);
45   }
46   if (expected.ss_data_available) {
47     EXPECT_EQ(expected.spatial_layer_resolution_present,
48               actual.spatial_layer_resolution_present);
49     EXPECT_EQ(expected.num_spatial_layers, actual.num_spatial_layers);
50     if (expected.spatial_layer_resolution_present) {
51       for (size_t i = 0; i < expected.num_spatial_layers; i++) {
52         EXPECT_EQ(expected.width[i], actual.width[i]);
53         EXPECT_EQ(expected.height[i], actual.height[i]);
54       }
55     }
56     EXPECT_EQ(expected.gof.num_frames_in_gof, actual.gof.num_frames_in_gof);
57     for (size_t i = 0; i < expected.gof.num_frames_in_gof; i++) {
58       EXPECT_EQ(expected.gof.temporal_up_switch[i],
59                 actual.gof.temporal_up_switch[i]);
60       EXPECT_EQ(expected.gof.temporal_idx[i], actual.gof.temporal_idx[i]);
61       EXPECT_EQ(expected.gof.num_ref_pics[i], actual.gof.num_ref_pics[i]);
62       for (uint8_t j = 0; j < expected.gof.num_ref_pics[i]; j++) {
63         EXPECT_EQ(expected.gof.pid_diff[i][j], actual.gof.pid_diff[i][j]);
64       }
65     }
66   }
67 }
68 
TEST(VideoRtpDepacketizerVp9Test,ParseBasicHeader)69 TEST(VideoRtpDepacketizerVp9Test, ParseBasicHeader) {
70   uint8_t packet[4] = {0};
71   packet[0] = 0x0C;  // I:0 P:0 L:0 F:0 B:1 E:1 V:0 Z:0
72 
73   RTPVideoHeader video_header;
74   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
75 
76   EXPECT_EQ(offset, 1);
77   RTPVideoHeaderVP9 expected;
78   expected.InitRTPVideoHeaderVP9();
79   expected.beginning_of_frame = true;
80   expected.end_of_frame = true;
81   VerifyHeader(expected,
82                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
83 }
84 
TEST(VideoRtpDepacketizerVp9Test,ParseOneBytePictureId)85 TEST(VideoRtpDepacketizerVp9Test, ParseOneBytePictureId) {
86   uint8_t packet[10] = {0};
87   packet[0] = 0x80;  // I:1 P:0 L:0 F:0 B:0 E:0 V:0 Z:0
88   packet[1] = kMaxOneBytePictureId;
89 
90   RTPVideoHeader video_header;
91   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
92 
93   EXPECT_EQ(offset, 2);
94   RTPVideoHeaderVP9 expected;
95   expected.InitRTPVideoHeaderVP9();
96   expected.picture_id = kMaxOneBytePictureId;
97   expected.max_picture_id = kMaxOneBytePictureId;
98   VerifyHeader(expected,
99                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
100 }
101 
TEST(VideoRtpDepacketizerVp9Test,ParseTwoBytePictureId)102 TEST(VideoRtpDepacketizerVp9Test, ParseTwoBytePictureId) {
103   uint8_t packet[10] = {0};
104   packet[0] = 0x80;  // I:1 P:0 L:0 F:0 B:0 E:0 V:0 Z:0
105   packet[1] = 0x80 | ((kMaxTwoBytePictureId >> 8) & 0x7F);
106   packet[2] = kMaxTwoBytePictureId & 0xFF;
107 
108   RTPVideoHeader video_header;
109   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
110 
111   EXPECT_EQ(offset, 3);
112   RTPVideoHeaderVP9 expected;
113   expected.InitRTPVideoHeaderVP9();
114   expected.picture_id = kMaxTwoBytePictureId;
115   expected.max_picture_id = kMaxTwoBytePictureId;
116   VerifyHeader(expected,
117                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
118 }
119 
TEST(VideoRtpDepacketizerVp9Test,ParseLayerInfoWithNonFlexibleMode)120 TEST(VideoRtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) {
121   const uint8_t kTemporalIdx = 2;
122   const uint8_t kUbit = 1;
123   const uint8_t kSpatialIdx = 1;
124   const uint8_t kDbit = 1;
125   const uint8_t kTl0PicIdx = 17;
126   uint8_t packet[13] = {0};
127   packet[0] = 0x20;  // I:0 P:0 L:1 F:0 B:0 E:0 V:0 Z:0
128   packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
129   packet[2] = kTl0PicIdx;
130 
131   RTPVideoHeader video_header;
132   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
133 
134   EXPECT_EQ(offset, 3);
135   RTPVideoHeaderVP9 expected;
136   expected.InitRTPVideoHeaderVP9();
137   // T:2 U:1 S:1 D:1
138   // TL0PICIDX:17
139   expected.temporal_idx = kTemporalIdx;
140   expected.temporal_up_switch = kUbit ? true : false;
141   expected.spatial_idx = kSpatialIdx;
142   expected.inter_layer_predicted = kDbit ? true : false;
143   expected.tl0_pic_idx = kTl0PicIdx;
144   VerifyHeader(expected,
145                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
146 }
147 
TEST(VideoRtpDepacketizerVp9Test,ParseLayerInfoWithFlexibleMode)148 TEST(VideoRtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) {
149   const uint8_t kTemporalIdx = 2;
150   const uint8_t kUbit = 1;
151   const uint8_t kSpatialIdx = 0;
152   const uint8_t kDbit = 0;
153   uint8_t packet[13] = {0};
154   packet[0] = 0x38;  // I:0 P:0 L:1 F:1 B:1 E:0 V:0 Z:0
155   packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
156 
157   RTPVideoHeader video_header;
158   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
159 
160   EXPECT_EQ(offset, 2);
161   RTPVideoHeaderVP9 expected;
162   expected.InitRTPVideoHeaderVP9();
163   // I:0 P:0 L:1 F:1 B:1 E:0 V:0 Z:0
164   // L:   T:2 U:1 S:0 D:0
165   expected.beginning_of_frame = true;
166   expected.flexible_mode = true;
167   expected.temporal_idx = kTemporalIdx;
168   expected.temporal_up_switch = kUbit ? true : false;
169   expected.spatial_idx = kSpatialIdx;
170   expected.inter_layer_predicted = kDbit ? true : false;
171   VerifyHeader(expected,
172                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
173 }
174 
TEST(VideoRtpDepacketizerVp9Test,ParseRefIdx)175 TEST(VideoRtpDepacketizerVp9Test, ParseRefIdx) {
176   const int16_t kPictureId = 17;
177   const uint8_t kPdiff1 = 17;
178   const uint8_t kPdiff2 = 18;
179   const uint8_t kPdiff3 = 127;
180   uint8_t packet[13] = {0};
181   packet[0] = 0xD8;  // I:1 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
182   packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F);  // Two byte pictureID.
183   packet[2] = kPictureId;
184   packet[3] = (kPdiff1 << 1) | 1;  // P_DIFF N:1
185   packet[4] = (kPdiff2 << 1) | 1;  // P_DIFF N:1
186   packet[5] = (kPdiff3 << 1) | 0;  // P_DIFF N:0
187 
188   RTPVideoHeader video_header;
189   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
190 
191   EXPECT_EQ(offset, 6);
192   RTPVideoHeaderVP9 expected;
193   expected.InitRTPVideoHeaderVP9();
194   // I:1 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
195   // I:    PICTURE ID:17
196   // I:
197   // P,F:  P_DIFF:17  N:1 => refPicId = 17 - 17 = 0
198   // P,F:  P_DIFF:18  N:1 => refPicId = (kMaxPictureId + 1) + 17 - 18 = 0x7FFF
199   // P,F:  P_DIFF:127 N:0 => refPicId = (kMaxPictureId + 1) + 17 - 127 = 32658
200   expected.beginning_of_frame = true;
201   expected.inter_pic_predicted = true;
202   expected.flexible_mode = true;
203   expected.picture_id = kPictureId;
204   expected.num_ref_pics = 3;
205   expected.pid_diff[0] = kPdiff1;
206   expected.pid_diff[1] = kPdiff2;
207   expected.pid_diff[2] = kPdiff3;
208   expected.ref_picture_id[0] = 0;
209   expected.ref_picture_id[1] = 0x7FFF;
210   expected.ref_picture_id[2] = 32658;
211   VerifyHeader(expected,
212                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
213 }
214 
TEST(VideoRtpDepacketizerVp9Test,ParseRefIdxFailsWithNoPictureId)215 TEST(VideoRtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) {
216   const uint8_t kPdiff = 3;
217   uint8_t packet[13] = {0};
218   packet[0] = 0x58;           // I:0 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
219   packet[1] = (kPdiff << 1);  // P,F:  P_DIFF:3 N:0
220 
221   RTPVideoHeader video_header;
222   EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header), 0);
223 }
224 
TEST(VideoRtpDepacketizerVp9Test,ParseRefIdxFailsWithTooManyRefPics)225 TEST(VideoRtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) {
226   const uint8_t kPdiff = 3;
227   uint8_t packet[13] = {0};
228   packet[0] = 0xD8;                  // I:1 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
229   packet[1] = kMaxOneBytePictureId;  // I:    PICTURE ID:127
230   packet[2] = (kPdiff << 1) | 1;     // P,F:  P_DIFF:3 N:1
231   packet[3] = (kPdiff << 1) | 1;     // P,F:  P_DIFF:3 N:1
232   packet[4] = (kPdiff << 1) | 1;     // P,F:  P_DIFF:3 N:1
233   packet[5] = (kPdiff << 1) | 0;     // P,F:  P_DIFF:3 N:0
234 
235   RTPVideoHeader video_header;
236   EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header), 0);
237 }
238 
TEST(VideoRtpDepacketizerVp9Test,ParseSsData)239 TEST(VideoRtpDepacketizerVp9Test, ParseSsData) {
240   const uint8_t kYbit = 0;
241   const size_t kNs = 2;
242   const size_t kNg = 2;
243   uint8_t packet[23] = {0};
244   packet[0] = 0x0A;  // I:0 P:0 L:0 F:0 B:1 E:0 V:1 Z:0
245   packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (1 << 3);  // N_S Y G:1 -
246   packet[2] = kNg;                                         // N_G
247   packet[3] = (0 << 5) | (1 << 4) | (0 << 2) | 0;          // T:0 U:1 R:0 -
248   packet[4] = (2 << 5) | (0 << 4) | (1 << 2) | 0;          // T:2 U:0 R:1 -
249   packet[5] = 33;
250 
251   RTPVideoHeader video_header;
252   int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
253 
254   EXPECT_EQ(offset, 6);
255   RTPVideoHeaderVP9 expected;
256   expected.InitRTPVideoHeaderVP9();
257   expected.beginning_of_frame = true;
258   expected.ss_data_available = true;
259   expected.num_spatial_layers = kNs;
260   expected.spatial_layer_resolution_present = kYbit ? true : false;
261   expected.gof.num_frames_in_gof = kNg;
262   expected.gof.temporal_idx[0] = 0;
263   expected.gof.temporal_idx[1] = 2;
264   expected.gof.temporal_up_switch[0] = true;
265   expected.gof.temporal_up_switch[1] = false;
266   expected.gof.num_ref_pics[0] = 0;
267   expected.gof.num_ref_pics[1] = 1;
268   expected.gof.pid_diff[1][0] = 33;
269   VerifyHeader(expected,
270                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
271 }
272 
TEST(VideoRtpDepacketizerVp9Test,ParseFirstPacketInKeyFrame)273 TEST(VideoRtpDepacketizerVp9Test, ParseFirstPacketInKeyFrame) {
274   uint8_t packet[2] = {0};
275   packet[0] = 0x08;  // I:0 P:0 L:0 F:0 B:1 E:0 V:0 Z:0
276 
277   RTPVideoHeader video_header;
278   VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
279 
280   EXPECT_EQ(video_header.frame_type, VideoFrameType::kVideoFrameKey);
281   EXPECT_TRUE(video_header.is_first_packet_in_frame);
282 }
283 
TEST(VideoRtpDepacketizerVp9Test,ParseLastPacketInDeltaFrame)284 TEST(VideoRtpDepacketizerVp9Test, ParseLastPacketInDeltaFrame) {
285   uint8_t packet[2] = {0};
286   packet[0] = 0x44;  // I:0 P:1 L:0 F:0 B:0 E:1 V:0 Z:0
287 
288   RTPVideoHeader video_header;
289   VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
290 
291   EXPECT_EQ(video_header.frame_type, VideoFrameType::kVideoFrameDelta);
292   EXPECT_FALSE(video_header.is_first_packet_in_frame);
293 }
294 
TEST(VideoRtpDepacketizerVp9Test,ParseResolution)295 TEST(VideoRtpDepacketizerVp9Test, ParseResolution) {
296   const uint16_t kWidth[2] = {640, 1280};
297   const uint16_t kHeight[2] = {360, 720};
298   uint8_t packet[20] = {0};
299   packet[0] = 0x0A;                     // I:0 P:0 L:0 F:0 B:1 E:0 V:1 Z:0
300   packet[1] = (1 << 5) | (1 << 4) | 0;  // N_S:1 Y:1 G:0
301   packet[2] = kWidth[0] >> 8;
302   packet[3] = kWidth[0] & 0xFF;
303   packet[4] = kHeight[0] >> 8;
304   packet[5] = kHeight[0] & 0xFF;
305   packet[6] = kWidth[1] >> 8;
306   packet[7] = kWidth[1] & 0xFF;
307   packet[8] = kHeight[1] >> 8;
308   packet[9] = kHeight[1] & 0xFF;
309 
310   RTPVideoHeader video_header;
311   VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
312 
313   EXPECT_EQ(video_header.width, kWidth[0]);
314   EXPECT_EQ(video_header.height, kHeight[0]);
315 }
316 
TEST(VideoRtpDepacketizerVp9Test,ParseFailsForNoPayloadLength)317 TEST(VideoRtpDepacketizerVp9Test, ParseFailsForNoPayloadLength) {
318   rtc::ArrayView<const uint8_t> empty;
319 
320   RTPVideoHeader video_header;
321   EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(empty, &video_header), 0);
322 }
323 
TEST(VideoRtpDepacketizerVp9Test,ParseFailsForTooShortBufferToFitPayload)324 TEST(VideoRtpDepacketizerVp9Test, ParseFailsForTooShortBufferToFitPayload) {
325   uint8_t packet[] = {0};
326 
327   RTPVideoHeader video_header;
328   EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header), 0);
329 }
330 
TEST(VideoRtpDepacketizerVp9Test,ParseNonRefForInterLayerPred)331 TEST(VideoRtpDepacketizerVp9Test, ParseNonRefForInterLayerPred) {
332   RTPVideoHeader video_header;
333   RTPVideoHeaderVP9 expected;
334   expected.InitRTPVideoHeaderVP9();
335   uint8_t packet[2] = {0};
336 
337   packet[0] = 0x08;  // I:0 P:0 L:0 F:0 B:1 E:0 V:0 Z:0
338   VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
339 
340   expected.beginning_of_frame = true;
341   expected.non_ref_for_inter_layer_pred = false;
342   VerifyHeader(expected,
343                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
344 
345   packet[0] = 0x05;  // I:0 P:0 L:0 F:0 B:0 E:1 V:0 Z:1
346   VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
347 
348   expected.beginning_of_frame = false;
349   expected.end_of_frame = true;
350   expected.non_ref_for_inter_layer_pred = true;
351   VerifyHeader(expected,
352                absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
353 }
354 
TEST(VideoRtpDepacketizerVp9Test,ReferencesInputCopyOnWriteBuffer)355 TEST(VideoRtpDepacketizerVp9Test, ReferencesInputCopyOnWriteBuffer) {
356   constexpr size_t kHeaderSize = 1;
357   uint8_t packet[4] = {0};
358   packet[0] = 0x0C;  // I:0 P:0 L:0 F:0 B:1 E:1 V:0 Z:0
359 
360   rtc::CopyOnWriteBuffer rtp_payload(packet);
361   VideoRtpDepacketizerVp9 depacketizer;
362   absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed =
363       depacketizer.Parse(rtp_payload);
364   ASSERT_TRUE(parsed);
365 
366   EXPECT_EQ(parsed->video_payload.size(), rtp_payload.size() - kHeaderSize);
367   // Compare pointers to check there was no copy on write buffer unsharing.
368   EXPECT_EQ(parsed->video_payload.cdata(), rtp_payload.cdata() + kHeaderSize);
369 }
370 }  // namespace
371 }  // namespace webrtc
372