1 /*
2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
12
13 #include <memory>
14 #include <vector>
15
16 #include "api/array_view.h"
17 #include "test/gmock.h"
18 #include "test/gtest.h"
19
20 namespace webrtc {
21 namespace {
22
VerifyHeader(const RTPVideoHeaderVP9 & expected,const RTPVideoHeaderVP9 & actual)23 void VerifyHeader(const RTPVideoHeaderVP9& expected,
24 const RTPVideoHeaderVP9& actual) {
25 EXPECT_EQ(expected.inter_layer_predicted, actual.inter_layer_predicted);
26 EXPECT_EQ(expected.inter_pic_predicted, actual.inter_pic_predicted);
27 EXPECT_EQ(expected.flexible_mode, actual.flexible_mode);
28 EXPECT_EQ(expected.beginning_of_frame, actual.beginning_of_frame);
29 EXPECT_EQ(expected.end_of_frame, actual.end_of_frame);
30 EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
31 EXPECT_EQ(expected.non_ref_for_inter_layer_pred,
32 actual.non_ref_for_inter_layer_pred);
33 EXPECT_EQ(expected.picture_id, actual.picture_id);
34 EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
35 EXPECT_EQ(expected.temporal_idx, actual.temporal_idx);
36 EXPECT_EQ(expected.spatial_idx, actual.spatial_idx);
37 EXPECT_EQ(expected.gof_idx, actual.gof_idx);
38 EXPECT_EQ(expected.tl0_pic_idx, actual.tl0_pic_idx);
39 EXPECT_EQ(expected.temporal_up_switch, actual.temporal_up_switch);
40
41 EXPECT_EQ(expected.num_ref_pics, actual.num_ref_pics);
42 for (uint8_t i = 0; i < expected.num_ref_pics; ++i) {
43 EXPECT_EQ(expected.pid_diff[i], actual.pid_diff[i]);
44 EXPECT_EQ(expected.ref_picture_id[i], actual.ref_picture_id[i]);
45 }
46 if (expected.ss_data_available) {
47 EXPECT_EQ(expected.spatial_layer_resolution_present,
48 actual.spatial_layer_resolution_present);
49 EXPECT_EQ(expected.num_spatial_layers, actual.num_spatial_layers);
50 if (expected.spatial_layer_resolution_present) {
51 for (size_t i = 0; i < expected.num_spatial_layers; i++) {
52 EXPECT_EQ(expected.width[i], actual.width[i]);
53 EXPECT_EQ(expected.height[i], actual.height[i]);
54 }
55 }
56 EXPECT_EQ(expected.gof.num_frames_in_gof, actual.gof.num_frames_in_gof);
57 for (size_t i = 0; i < expected.gof.num_frames_in_gof; i++) {
58 EXPECT_EQ(expected.gof.temporal_up_switch[i],
59 actual.gof.temporal_up_switch[i]);
60 EXPECT_EQ(expected.gof.temporal_idx[i], actual.gof.temporal_idx[i]);
61 EXPECT_EQ(expected.gof.num_ref_pics[i], actual.gof.num_ref_pics[i]);
62 for (uint8_t j = 0; j < expected.gof.num_ref_pics[i]; j++) {
63 EXPECT_EQ(expected.gof.pid_diff[i][j], actual.gof.pid_diff[i][j]);
64 }
65 }
66 }
67 }
68
TEST(VideoRtpDepacketizerVp9Test,ParseBasicHeader)69 TEST(VideoRtpDepacketizerVp9Test, ParseBasicHeader) {
70 uint8_t packet[4] = {0};
71 packet[0] = 0x0C; // I:0 P:0 L:0 F:0 B:1 E:1 V:0 Z:0
72
73 RTPVideoHeader video_header;
74 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
75
76 EXPECT_EQ(offset, 1);
77 RTPVideoHeaderVP9 expected;
78 expected.InitRTPVideoHeaderVP9();
79 expected.beginning_of_frame = true;
80 expected.end_of_frame = true;
81 VerifyHeader(expected,
82 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
83 }
84
TEST(VideoRtpDepacketizerVp9Test,ParseOneBytePictureId)85 TEST(VideoRtpDepacketizerVp9Test, ParseOneBytePictureId) {
86 uint8_t packet[10] = {0};
87 packet[0] = 0x80; // I:1 P:0 L:0 F:0 B:0 E:0 V:0 Z:0
88 packet[1] = kMaxOneBytePictureId;
89
90 RTPVideoHeader video_header;
91 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
92
93 EXPECT_EQ(offset, 2);
94 RTPVideoHeaderVP9 expected;
95 expected.InitRTPVideoHeaderVP9();
96 expected.picture_id = kMaxOneBytePictureId;
97 expected.max_picture_id = kMaxOneBytePictureId;
98 VerifyHeader(expected,
99 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
100 }
101
TEST(VideoRtpDepacketizerVp9Test,ParseTwoBytePictureId)102 TEST(VideoRtpDepacketizerVp9Test, ParseTwoBytePictureId) {
103 uint8_t packet[10] = {0};
104 packet[0] = 0x80; // I:1 P:0 L:0 F:0 B:0 E:0 V:0 Z:0
105 packet[1] = 0x80 | ((kMaxTwoBytePictureId >> 8) & 0x7F);
106 packet[2] = kMaxTwoBytePictureId & 0xFF;
107
108 RTPVideoHeader video_header;
109 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
110
111 EXPECT_EQ(offset, 3);
112 RTPVideoHeaderVP9 expected;
113 expected.InitRTPVideoHeaderVP9();
114 expected.picture_id = kMaxTwoBytePictureId;
115 expected.max_picture_id = kMaxTwoBytePictureId;
116 VerifyHeader(expected,
117 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
118 }
119
TEST(VideoRtpDepacketizerVp9Test,ParseLayerInfoWithNonFlexibleMode)120 TEST(VideoRtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) {
121 const uint8_t kTemporalIdx = 2;
122 const uint8_t kUbit = 1;
123 const uint8_t kSpatialIdx = 1;
124 const uint8_t kDbit = 1;
125 const uint8_t kTl0PicIdx = 17;
126 uint8_t packet[13] = {0};
127 packet[0] = 0x20; // I:0 P:0 L:1 F:0 B:0 E:0 V:0 Z:0
128 packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
129 packet[2] = kTl0PicIdx;
130
131 RTPVideoHeader video_header;
132 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
133
134 EXPECT_EQ(offset, 3);
135 RTPVideoHeaderVP9 expected;
136 expected.InitRTPVideoHeaderVP9();
137 // T:2 U:1 S:1 D:1
138 // TL0PICIDX:17
139 expected.temporal_idx = kTemporalIdx;
140 expected.temporal_up_switch = kUbit ? true : false;
141 expected.spatial_idx = kSpatialIdx;
142 expected.inter_layer_predicted = kDbit ? true : false;
143 expected.tl0_pic_idx = kTl0PicIdx;
144 VerifyHeader(expected,
145 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
146 }
147
TEST(VideoRtpDepacketizerVp9Test,ParseLayerInfoWithFlexibleMode)148 TEST(VideoRtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) {
149 const uint8_t kTemporalIdx = 2;
150 const uint8_t kUbit = 1;
151 const uint8_t kSpatialIdx = 0;
152 const uint8_t kDbit = 0;
153 uint8_t packet[13] = {0};
154 packet[0] = 0x38; // I:0 P:0 L:1 F:1 B:1 E:0 V:0 Z:0
155 packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
156
157 RTPVideoHeader video_header;
158 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
159
160 EXPECT_EQ(offset, 2);
161 RTPVideoHeaderVP9 expected;
162 expected.InitRTPVideoHeaderVP9();
163 // I:0 P:0 L:1 F:1 B:1 E:0 V:0 Z:0
164 // L: T:2 U:1 S:0 D:0
165 expected.beginning_of_frame = true;
166 expected.flexible_mode = true;
167 expected.temporal_idx = kTemporalIdx;
168 expected.temporal_up_switch = kUbit ? true : false;
169 expected.spatial_idx = kSpatialIdx;
170 expected.inter_layer_predicted = kDbit ? true : false;
171 VerifyHeader(expected,
172 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
173 }
174
TEST(VideoRtpDepacketizerVp9Test,ParseRefIdx)175 TEST(VideoRtpDepacketizerVp9Test, ParseRefIdx) {
176 const int16_t kPictureId = 17;
177 const uint8_t kPdiff1 = 17;
178 const uint8_t kPdiff2 = 18;
179 const uint8_t kPdiff3 = 127;
180 uint8_t packet[13] = {0};
181 packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
182 packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F); // Two byte pictureID.
183 packet[2] = kPictureId;
184 packet[3] = (kPdiff1 << 1) | 1; // P_DIFF N:1
185 packet[4] = (kPdiff2 << 1) | 1; // P_DIFF N:1
186 packet[5] = (kPdiff3 << 1) | 0; // P_DIFF N:0
187
188 RTPVideoHeader video_header;
189 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
190
191 EXPECT_EQ(offset, 6);
192 RTPVideoHeaderVP9 expected;
193 expected.InitRTPVideoHeaderVP9();
194 // I:1 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
195 // I: PICTURE ID:17
196 // I:
197 // P,F: P_DIFF:17 N:1 => refPicId = 17 - 17 = 0
198 // P,F: P_DIFF:18 N:1 => refPicId = (kMaxPictureId + 1) + 17 - 18 = 0x7FFF
199 // P,F: P_DIFF:127 N:0 => refPicId = (kMaxPictureId + 1) + 17 - 127 = 32658
200 expected.beginning_of_frame = true;
201 expected.inter_pic_predicted = true;
202 expected.flexible_mode = true;
203 expected.picture_id = kPictureId;
204 expected.num_ref_pics = 3;
205 expected.pid_diff[0] = kPdiff1;
206 expected.pid_diff[1] = kPdiff2;
207 expected.pid_diff[2] = kPdiff3;
208 expected.ref_picture_id[0] = 0;
209 expected.ref_picture_id[1] = 0x7FFF;
210 expected.ref_picture_id[2] = 32658;
211 VerifyHeader(expected,
212 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
213 }
214
TEST(VideoRtpDepacketizerVp9Test,ParseRefIdxFailsWithNoPictureId)215 TEST(VideoRtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) {
216 const uint8_t kPdiff = 3;
217 uint8_t packet[13] = {0};
218 packet[0] = 0x58; // I:0 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
219 packet[1] = (kPdiff << 1); // P,F: P_DIFF:3 N:0
220
221 RTPVideoHeader video_header;
222 EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header), 0);
223 }
224
TEST(VideoRtpDepacketizerVp9Test,ParseRefIdxFailsWithTooManyRefPics)225 TEST(VideoRtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) {
226 const uint8_t kPdiff = 3;
227 uint8_t packet[13] = {0};
228 packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 Z:0
229 packet[1] = kMaxOneBytePictureId; // I: PICTURE ID:127
230 packet[2] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1
231 packet[3] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1
232 packet[4] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1
233 packet[5] = (kPdiff << 1) | 0; // P,F: P_DIFF:3 N:0
234
235 RTPVideoHeader video_header;
236 EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header), 0);
237 }
238
TEST(VideoRtpDepacketizerVp9Test,ParseSsData)239 TEST(VideoRtpDepacketizerVp9Test, ParseSsData) {
240 const uint8_t kYbit = 0;
241 const size_t kNs = 2;
242 const size_t kNg = 2;
243 uint8_t packet[23] = {0};
244 packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 Z:0
245 packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (1 << 3); // N_S Y G:1 -
246 packet[2] = kNg; // N_G
247 packet[3] = (0 << 5) | (1 << 4) | (0 << 2) | 0; // T:0 U:1 R:0 -
248 packet[4] = (2 << 5) | (0 << 4) | (1 << 2) | 0; // T:2 U:0 R:1 -
249 packet[5] = 33;
250
251 RTPVideoHeader video_header;
252 int offset = VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
253
254 EXPECT_EQ(offset, 6);
255 RTPVideoHeaderVP9 expected;
256 expected.InitRTPVideoHeaderVP9();
257 expected.beginning_of_frame = true;
258 expected.ss_data_available = true;
259 expected.num_spatial_layers = kNs;
260 expected.spatial_layer_resolution_present = kYbit ? true : false;
261 expected.gof.num_frames_in_gof = kNg;
262 expected.gof.temporal_idx[0] = 0;
263 expected.gof.temporal_idx[1] = 2;
264 expected.gof.temporal_up_switch[0] = true;
265 expected.gof.temporal_up_switch[1] = false;
266 expected.gof.num_ref_pics[0] = 0;
267 expected.gof.num_ref_pics[1] = 1;
268 expected.gof.pid_diff[1][0] = 33;
269 VerifyHeader(expected,
270 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
271 }
272
TEST(VideoRtpDepacketizerVp9Test,ParseFirstPacketInKeyFrame)273 TEST(VideoRtpDepacketizerVp9Test, ParseFirstPacketInKeyFrame) {
274 uint8_t packet[2] = {0};
275 packet[0] = 0x08; // I:0 P:0 L:0 F:0 B:1 E:0 V:0 Z:0
276
277 RTPVideoHeader video_header;
278 VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
279
280 EXPECT_EQ(video_header.frame_type, VideoFrameType::kVideoFrameKey);
281 EXPECT_TRUE(video_header.is_first_packet_in_frame);
282 }
283
TEST(VideoRtpDepacketizerVp9Test,ParseLastPacketInDeltaFrame)284 TEST(VideoRtpDepacketizerVp9Test, ParseLastPacketInDeltaFrame) {
285 uint8_t packet[2] = {0};
286 packet[0] = 0x44; // I:0 P:1 L:0 F:0 B:0 E:1 V:0 Z:0
287
288 RTPVideoHeader video_header;
289 VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
290
291 EXPECT_EQ(video_header.frame_type, VideoFrameType::kVideoFrameDelta);
292 EXPECT_FALSE(video_header.is_first_packet_in_frame);
293 }
294
TEST(VideoRtpDepacketizerVp9Test,ParseResolution)295 TEST(VideoRtpDepacketizerVp9Test, ParseResolution) {
296 const uint16_t kWidth[2] = {640, 1280};
297 const uint16_t kHeight[2] = {360, 720};
298 uint8_t packet[20] = {0};
299 packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 Z:0
300 packet[1] = (1 << 5) | (1 << 4) | 0; // N_S:1 Y:1 G:0
301 packet[2] = kWidth[0] >> 8;
302 packet[3] = kWidth[0] & 0xFF;
303 packet[4] = kHeight[0] >> 8;
304 packet[5] = kHeight[0] & 0xFF;
305 packet[6] = kWidth[1] >> 8;
306 packet[7] = kWidth[1] & 0xFF;
307 packet[8] = kHeight[1] >> 8;
308 packet[9] = kHeight[1] & 0xFF;
309
310 RTPVideoHeader video_header;
311 VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
312
313 EXPECT_EQ(video_header.width, kWidth[0]);
314 EXPECT_EQ(video_header.height, kHeight[0]);
315 }
316
TEST(VideoRtpDepacketizerVp9Test,ParseFailsForNoPayloadLength)317 TEST(VideoRtpDepacketizerVp9Test, ParseFailsForNoPayloadLength) {
318 rtc::ArrayView<const uint8_t> empty;
319
320 RTPVideoHeader video_header;
321 EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(empty, &video_header), 0);
322 }
323
TEST(VideoRtpDepacketizerVp9Test,ParseFailsForTooShortBufferToFitPayload)324 TEST(VideoRtpDepacketizerVp9Test, ParseFailsForTooShortBufferToFitPayload) {
325 uint8_t packet[] = {0};
326
327 RTPVideoHeader video_header;
328 EXPECT_EQ(VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header), 0);
329 }
330
TEST(VideoRtpDepacketizerVp9Test,ParseNonRefForInterLayerPred)331 TEST(VideoRtpDepacketizerVp9Test, ParseNonRefForInterLayerPred) {
332 RTPVideoHeader video_header;
333 RTPVideoHeaderVP9 expected;
334 expected.InitRTPVideoHeaderVP9();
335 uint8_t packet[2] = {0};
336
337 packet[0] = 0x08; // I:0 P:0 L:0 F:0 B:1 E:0 V:0 Z:0
338 VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
339
340 expected.beginning_of_frame = true;
341 expected.non_ref_for_inter_layer_pred = false;
342 VerifyHeader(expected,
343 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
344
345 packet[0] = 0x05; // I:0 P:0 L:0 F:0 B:0 E:1 V:0 Z:1
346 VideoRtpDepacketizerVp9::ParseRtpPayload(packet, &video_header);
347
348 expected.beginning_of_frame = false;
349 expected.end_of_frame = true;
350 expected.non_ref_for_inter_layer_pred = true;
351 VerifyHeader(expected,
352 absl::get<RTPVideoHeaderVP9>(video_header.video_type_header));
353 }
354
TEST(VideoRtpDepacketizerVp9Test,ReferencesInputCopyOnWriteBuffer)355 TEST(VideoRtpDepacketizerVp9Test, ReferencesInputCopyOnWriteBuffer) {
356 constexpr size_t kHeaderSize = 1;
357 uint8_t packet[4] = {0};
358 packet[0] = 0x0C; // I:0 P:0 L:0 F:0 B:1 E:1 V:0 Z:0
359
360 rtc::CopyOnWriteBuffer rtp_payload(packet);
361 VideoRtpDepacketizerVp9 depacketizer;
362 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed =
363 depacketizer.Parse(rtp_payload);
364 ASSERT_TRUE(parsed);
365
366 EXPECT_EQ(parsed->video_payload.size(), rtp_payload.size() - kHeaderSize);
367 // Compare pointers to check there was no copy on write buffer unsharing.
368 EXPECT_EQ(parsed->video_payload.cdata(), rtp_payload.cdata() + kHeaderSize);
369 }
370 } // namespace
371 } // namespace webrtc
372