1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <memory>
12 #include <vector>
13 
14 #include "modules/rtp_rtcp/source/rtp_format_vp9.h"
15 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
16 #include "test/gmock.h"
17 #include "test/gtest.h"
18 #include "typedefs.h"  // NOLINT(build/include)
19 
20 namespace webrtc {
21 namespace {
VerifyHeader(const RTPVideoHeaderVP9 & expected,const RTPVideoHeaderVP9 & actual)22 void VerifyHeader(const RTPVideoHeaderVP9& expected,
23                   const RTPVideoHeaderVP9& actual) {
24   EXPECT_EQ(expected.inter_layer_predicted, actual.inter_layer_predicted);
25   EXPECT_EQ(expected.inter_pic_predicted, actual.inter_pic_predicted);
26   EXPECT_EQ(expected.flexible_mode, actual.flexible_mode);
27   EXPECT_EQ(expected.beginning_of_frame, actual.beginning_of_frame);
28   EXPECT_EQ(expected.end_of_frame, actual.end_of_frame);
29   EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
30   EXPECT_EQ(expected.picture_id, actual.picture_id);
31   EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
32   EXPECT_EQ(expected.temporal_idx, actual.temporal_idx);
33   EXPECT_EQ(expected.spatial_idx, actual.spatial_idx);
34   EXPECT_EQ(expected.gof_idx, actual.gof_idx);
35   EXPECT_EQ(expected.tl0_pic_idx, actual.tl0_pic_idx);
36   EXPECT_EQ(expected.temporal_up_switch, actual.temporal_up_switch);
37 
38   EXPECT_EQ(expected.num_ref_pics, actual.num_ref_pics);
39   for (uint8_t i = 0; i < expected.num_ref_pics; ++i) {
40     EXPECT_EQ(expected.pid_diff[i], actual.pid_diff[i]);
41     EXPECT_EQ(expected.ref_picture_id[i], actual.ref_picture_id[i]);
42   }
43   if (expected.ss_data_available) {
44     EXPECT_EQ(expected.spatial_layer_resolution_present,
45               actual.spatial_layer_resolution_present);
46     EXPECT_EQ(expected.num_spatial_layers, actual.num_spatial_layers);
47     if (expected.spatial_layer_resolution_present) {
48       for (size_t i = 0; i < expected.num_spatial_layers; i++) {
49         EXPECT_EQ(expected.width[i], actual.width[i]);
50         EXPECT_EQ(expected.height[i], actual.height[i]);
51       }
52     }
53     EXPECT_EQ(expected.gof.num_frames_in_gof, actual.gof.num_frames_in_gof);
54     for (size_t i = 0; i < expected.gof.num_frames_in_gof; i++) {
55       EXPECT_EQ(expected.gof.temporal_up_switch[i],
56                 actual.gof.temporal_up_switch[i]);
57       EXPECT_EQ(expected.gof.temporal_idx[i], actual.gof.temporal_idx[i]);
58       EXPECT_EQ(expected.gof.num_ref_pics[i], actual.gof.num_ref_pics[i]);
59       for (uint8_t j = 0; j < expected.gof.num_ref_pics[i]; j++) {
60         EXPECT_EQ(expected.gof.pid_diff[i][j], actual.gof.pid_diff[i][j]);
61       }
62     }
63   }
64 }
65 
VerifyPayload(const RtpDepacketizer::ParsedPayload & parsed,const uint8_t * payload,size_t payload_length)66 void VerifyPayload(const RtpDepacketizer::ParsedPayload& parsed,
67                    const uint8_t* payload,
68                    size_t payload_length) {
69   EXPECT_EQ(payload, parsed.payload);
70   EXPECT_EQ(payload_length, parsed.payload_length);
71   EXPECT_THAT(std::vector<uint8_t>(parsed.payload,
72                                    parsed.payload + parsed.payload_length),
73               ::testing::ElementsAreArray(payload, payload_length));
74 }
75 
ParseAndCheckPacket(const uint8_t * packet,const RTPVideoHeaderVP9 & expected,size_t expected_hdr_length,size_t expected_length)76 void ParseAndCheckPacket(const uint8_t* packet,
77                          const RTPVideoHeaderVP9& expected,
78                          size_t expected_hdr_length,
79                          size_t expected_length) {
80   std::unique_ptr<RtpDepacketizer> depacketizer(new RtpDepacketizerVp9());
81   RtpDepacketizer::ParsedPayload parsed;
82   ASSERT_TRUE(depacketizer->Parse(&parsed, packet, expected_length));
83   EXPECT_EQ(kRtpVideoVp9, parsed.type.Video.codec);
84   VerifyHeader(expected, parsed.type.Video.codecHeader.VP9);
85   const size_t kExpectedPayloadLength = expected_length - expected_hdr_length;
86   VerifyPayload(parsed, packet + expected_hdr_length, kExpectedPayloadLength);
87 }
88 }  // namespace
89 
90 // Payload descriptor for flexible mode
91 //        0 1 2 3 4 5 6 7
92 //        +-+-+-+-+-+-+-+-+
93 //        |I|P|L|F|B|E|V|-| (REQUIRED)
94 //        +-+-+-+-+-+-+-+-+
95 //   I:   |M| PICTURE ID  | (RECOMMENDED)
96 //        +-+-+-+-+-+-+-+-+
97 //   M:   | EXTENDED PID  | (RECOMMENDED)
98 //        +-+-+-+-+-+-+-+-+
99 //   L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
100 //        +-+-+-+-+-+-+-+-+                             -|
101 //   P,F: | P_DIFF      |N| (CONDITIONALLY RECOMMENDED)  . up to 3 times
102 //        +-+-+-+-+-+-+-+-+                             -|
103 //   V:   | SS            |
104 //        | ..            |
105 //        +-+-+-+-+-+-+-+-+
106 //
107 // Payload descriptor for non-flexible mode
108 //        0 1 2 3 4 5 6 7
109 //        +-+-+-+-+-+-+-+-+
110 //        |I|P|L|F|B|E|V|-| (REQUIRED)
111 //        +-+-+-+-+-+-+-+-+
112 //   I:   |M| PICTURE ID  | (RECOMMENDED)
113 //        +-+-+-+-+-+-+-+-+
114 //   M:   | EXTENDED PID  | (RECOMMENDED)
115 //        +-+-+-+-+-+-+-+-+
116 //   L:   |  T  |U|  S  |D| (CONDITIONALLY RECOMMENDED)
117 //        +-+-+-+-+-+-+-+-+
118 //        |   TL0PICIDX   | (CONDITIONALLY REQUIRED)
119 //        +-+-+-+-+-+-+-+-+
120 //   V:   | SS            |
121 //        | ..            |
122 //        +-+-+-+-+-+-+-+-+
123 
124 class RtpPacketizerVp9Test : public ::testing::Test {
125  protected:
126   static constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr;
127   static constexpr size_t kMaxPacketSize = 1200;
128 
RtpPacketizerVp9Test()129   RtpPacketizerVp9Test() : packet_(kNoExtensions, kMaxPacketSize) {}
SetUp()130   virtual void SetUp() {
131     expected_.InitRTPVideoHeaderVP9();
132   }
133 
134   RtpPacketToSend packet_;
135   std::unique_ptr<uint8_t[]> payload_;
136   size_t payload_size_;
137   size_t payload_pos_;
138   RTPVideoHeaderVP9 expected_;
139   std::unique_ptr<RtpPacketizerVp9> packetizer_;
140   size_t num_packets_;
141 
Init(size_t payload_size,size_t packet_size)142   void Init(size_t payload_size, size_t packet_size) {
143     payload_.reset(new uint8_t[payload_size]);
144     memset(payload_.get(), 7, payload_size);
145     payload_size_ = payload_size;
146     payload_pos_ = 0;
147     packetizer_.reset(new RtpPacketizerVp9(expected_, packet_size,
148                                            /*last_packet_reduction_len=*/0));
149     num_packets_ =
150         packetizer_->SetPayloadData(payload_.get(), payload_size_, nullptr);
151   }
152 
CheckPayload(const uint8_t * packet,size_t start_pos,size_t end_pos,bool last)153   void CheckPayload(const uint8_t* packet,
154                     size_t start_pos,
155                     size_t end_pos,
156                     bool last) {
157     for (size_t i = start_pos; i < end_pos; ++i) {
158       EXPECT_EQ(packet[i], payload_[payload_pos_++]);
159     }
160     EXPECT_EQ(last, payload_pos_ == payload_size_);
161   }
162 
CreateParseAndCheckPackets(const size_t * expected_hdr_sizes,const size_t * expected_sizes,size_t expected_num_packets)163   void CreateParseAndCheckPackets(const size_t* expected_hdr_sizes,
164                                   const size_t* expected_sizes,
165                                   size_t expected_num_packets) {
166     ASSERT_TRUE(packetizer_.get() != NULL);
167     if (expected_num_packets == 0) {
168       EXPECT_FALSE(packetizer_->NextPacket(&packet_));
169       return;
170     }
171     EXPECT_EQ(expected_num_packets, num_packets_);
172     for (size_t i = 0; i < expected_num_packets; ++i) {
173       EXPECT_TRUE(packetizer_->NextPacket(&packet_));
174       auto rtp_payload = packet_.payload();
175       EXPECT_EQ(expected_sizes[i], rtp_payload.size());
176       RTPVideoHeaderVP9 hdr = expected_;
177       hdr.beginning_of_frame = (i == 0);
178       hdr.end_of_frame = (i + 1) == expected_num_packets;
179       ParseAndCheckPacket(rtp_payload.data(), hdr, expected_hdr_sizes[i],
180                           rtp_payload.size());
181       CheckPayload(rtp_payload.data(), expected_hdr_sizes[i],
182                    rtp_payload.size(), (i + 1) == expected_num_packets);
183       expected_.ss_data_available = false;
184     }
185   }
186 };
187 
TEST_F(RtpPacketizerVp9Test,TestEqualSizedMode_OnePacket)188 TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_OnePacket) {
189   const size_t kFrameSize = 25;
190   const size_t kPacketSize = 26;
191   Init(kFrameSize, kPacketSize);
192 
193   // One packet:
194   // I:0, P:0, L:0, F:0, B:1, E:1, V:0  (1hdr + 25 payload)
195   const size_t kExpectedHdrSizes[] = {1};
196   const size_t kExpectedSizes[] = {26};
197   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
198   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
199 }
200 
TEST_F(RtpPacketizerVp9Test,TestEqualSizedMode_TwoPackets)201 TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_TwoPackets) {
202   const size_t kFrameSize = 27;
203   const size_t kPacketSize = 27;
204   Init(kFrameSize, kPacketSize);
205 
206   // Two packets:
207   // I:0, P:0, L:0, F:0, B:1, E:0, V:0  (1hdr + 14 payload)
208   // I:0, P:0, L:0, F:0, B:0, E:1, V:0  (1hdr + 13 payload)
209   const size_t kExpectedHdrSizes[] = {1, 1};
210   const size_t kExpectedSizes[] = {14, 15};
211   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
212   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
213 }
214 
TEST_F(RtpPacketizerVp9Test,TestTooShortBufferToFitPayload)215 TEST_F(RtpPacketizerVp9Test, TestTooShortBufferToFitPayload) {
216   const size_t kFrameSize = 1;
217   const size_t kPacketSize = 1;
218   Init(kFrameSize, kPacketSize);  // 1hdr + 1 payload
219 
220   const size_t kExpectedNum = 0;
221   CreateParseAndCheckPackets(NULL, NULL, kExpectedNum);
222 }
223 
TEST_F(RtpPacketizerVp9Test,TestOneBytePictureId)224 TEST_F(RtpPacketizerVp9Test, TestOneBytePictureId) {
225   const size_t kFrameSize = 30;
226   const size_t kPacketSize = 12;
227 
228   expected_.picture_id = kMaxOneBytePictureId;   // 2 byte payload descriptor
229   expected_.max_picture_id = kMaxOneBytePictureId;
230   Init(kFrameSize, kPacketSize);
231 
232   // Three packets:
233   // I:1, P:0, L:0, F:0, B:1, E:0, V:0 (2hdr + 10 payload)
234   // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (2hdr + 10 payload)
235   // I:1, P:0, L:0, F:0, B:0, E:1, V:0 (2hdr + 10 payload)
236   const size_t kExpectedHdrSizes[] = {2, 2, 2};
237   const size_t kExpectedSizes[] = {12, 12, 12};
238   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
239   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
240 }
241 
TEST_F(RtpPacketizerVp9Test,TestTwoBytePictureId)242 TEST_F(RtpPacketizerVp9Test, TestTwoBytePictureId) {
243   const size_t kFrameSize = 31;
244   const size_t kPacketSize = 13;
245 
246   expected_.picture_id = kMaxTwoBytePictureId;  // 3 byte payload descriptor
247   Init(kFrameSize, kPacketSize);
248 
249   // Four packets:
250   // I:1, P:0, L:0, F:0, B:1, E:0, V:0 (3hdr + 8 payload)
251   // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload)
252   // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload)
253   // I:1, P:0, L:0, F:0, B:0, E:1, V:0 (3hdr + 7 payload)
254   const size_t kExpectedHdrSizes[] = {3, 3, 3, 3};
255   const size_t kExpectedSizes[] = {10, 11, 11, 11};
256   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
257   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
258 }
259 
TEST_F(RtpPacketizerVp9Test,TestLayerInfoWithNonFlexibleMode)260 TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithNonFlexibleMode) {
261   const size_t kFrameSize = 30;
262   const size_t kPacketSize = 25;
263 
264   expected_.temporal_idx = 3;
265   expected_.temporal_up_switch = true;  // U
266   expected_.num_spatial_layers = 3;
267   expected_.spatial_idx = 2;
268   expected_.inter_layer_predicted = true;  // D
269   expected_.tl0_pic_idx = 117;
270   Init(kFrameSize, kPacketSize);
271 
272   // Two packets:
273   //    | I:0, P:0, L:1, F:0, B:1, E:0, V:0 | (3hdr + 15 payload)
274   // L: | T:3, U:1, S:2, D:1 | TL0PICIDX:117 |
275   //    | I:0, P:0, L:1, F:0, B:0, E:1, V:0 | (3hdr + 15 payload)
276   // L: | T:3, U:1, S:2, D:1 | TL0PICIDX:117 |
277   const size_t kExpectedHdrSizes[] = {3, 3};
278   const size_t kExpectedSizes[] = {18, 18};
279   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
280   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
281 }
282 
TEST_F(RtpPacketizerVp9Test,TestLayerInfoWithFlexibleMode)283 TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithFlexibleMode) {
284   const size_t kFrameSize = 21;
285   const size_t kPacketSize = 23;
286 
287   expected_.flexible_mode = true;
288   expected_.temporal_idx = 3;
289   expected_.temporal_up_switch = true;  // U
290   expected_.num_spatial_layers = 3;
291   expected_.spatial_idx = 2;
292   expected_.inter_layer_predicted = false;  // D
293   Init(kFrameSize, kPacketSize);
294 
295   // One packet:
296   // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 21 payload)
297   // L:   T:3, U:1, S:2, D:0
298   const size_t kExpectedHdrSizes[] = {2};
299   const size_t kExpectedSizes[] = {23};
300   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
301   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
302 }
303 
TEST_F(RtpPacketizerVp9Test,TestRefIdx)304 TEST_F(RtpPacketizerVp9Test, TestRefIdx) {
305   const size_t kFrameSize = 16;
306   const size_t kPacketSize = 21;
307 
308   expected_.inter_pic_predicted = true;  // P
309   expected_.flexible_mode = true;        // F
310   expected_.picture_id = 2;
311   expected_.max_picture_id = kMaxOneBytePictureId;
312 
313   expected_.num_ref_pics = 3;
314   expected_.pid_diff[0] = 1;
315   expected_.pid_diff[1] = 3;
316   expected_.pid_diff[2] = 127;
317   expected_.ref_picture_id[0] = 1;    // 2 - 1 = 1
318   expected_.ref_picture_id[1] = 127;  // (kMaxPictureId + 1) + 2 - 3 = 127
319   expected_.ref_picture_id[2] = 3;    // (kMaxPictureId + 1) + 2 - 127 = 3
320   Init(kFrameSize, kPacketSize);
321 
322   // Two packets:
323   // I:1, P:1, L:0, F:1, B:1, E:1, V:0 (5hdr + 16 payload)
324   // I:   2
325   // P,F: P_DIFF:1,   N:1
326   //      P_DIFF:3,   N:1
327   //      P_DIFF:127, N:0
328   const size_t kExpectedHdrSizes[] = {5};
329   const size_t kExpectedSizes[] = {21};
330   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
331   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
332 }
333 
TEST_F(RtpPacketizerVp9Test,TestRefIdxFailsWithoutPictureId)334 TEST_F(RtpPacketizerVp9Test, TestRefIdxFailsWithoutPictureId) {
335   const size_t kFrameSize = 16;
336   const size_t kPacketSize = 21;
337 
338   expected_.inter_pic_predicted = true;
339   expected_.flexible_mode = true;
340   expected_.num_ref_pics = 1;
341   expected_.pid_diff[0] = 3;
342   Init(kFrameSize, kPacketSize);
343 
344   const size_t kExpectedNum = 0;
345   CreateParseAndCheckPackets(NULL, NULL, kExpectedNum);
346 }
347 
TEST_F(RtpPacketizerVp9Test,TestSsDataWithoutSpatialResolutionPresent)348 TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutSpatialResolutionPresent) {
349   const size_t kFrameSize = 21;
350   const size_t kPacketSize = 26;
351 
352   expected_.ss_data_available = true;
353   expected_.num_spatial_layers = 1;
354   expected_.spatial_layer_resolution_present = false;
355   expected_.gof.num_frames_in_gof = 1;
356   expected_.gof.temporal_idx[0] = 0;
357   expected_.gof.temporal_up_switch[0] = true;
358   expected_.gof.num_ref_pics[0] = 1;
359   expected_.gof.pid_diff[0][0] = 4;
360   Init(kFrameSize, kPacketSize);
361 
362   // One packet:
363   // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (5hdr + 21 payload)
364   // N_S:0, Y:0, G:1
365   // N_G:1
366   // T:0, U:1, R:1 | P_DIFF[0][0]:4
367   const size_t kExpectedHdrSizes[] = {5};
368   const size_t kExpectedSizes[] = {26};
369   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
370   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
371 }
372 
TEST_F(RtpPacketizerVp9Test,TestSsDataWithoutGbitPresent)373 TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutGbitPresent) {
374   const size_t kFrameSize = 21;
375   const size_t kPacketSize = 23;
376 
377   expected_.ss_data_available = true;
378   expected_.num_spatial_layers = 1;
379   expected_.spatial_layer_resolution_present = false;
380   expected_.gof.num_frames_in_gof = 0;
381   Init(kFrameSize, kPacketSize);
382 
383   // One packet:
384   // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (2hdr + 21 payload)
385   // N_S:0, Y:0, G:0
386   const size_t kExpectedHdrSizes[] = {2};
387   const size_t kExpectedSizes[] = {23};
388   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
389   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
390 }
391 
TEST_F(RtpPacketizerVp9Test,TestSsData)392 TEST_F(RtpPacketizerVp9Test, TestSsData) {
393   const size_t kFrameSize = 21;
394   const size_t kPacketSize = 40;
395 
396   expected_.ss_data_available = true;
397   expected_.num_spatial_layers = 2;
398   expected_.spatial_layer_resolution_present = true;
399   expected_.width[0] = 640;
400   expected_.width[1] = 1280;
401   expected_.height[0] = 360;
402   expected_.height[1] = 720;
403   expected_.gof.num_frames_in_gof = 3;
404   expected_.gof.temporal_idx[0] = 0;
405   expected_.gof.temporal_idx[1] = 1;
406   expected_.gof.temporal_idx[2] = 2;
407   expected_.gof.temporal_up_switch[0] = true;
408   expected_.gof.temporal_up_switch[1] = true;
409   expected_.gof.temporal_up_switch[2] = false;
410   expected_.gof.num_ref_pics[0] = 0;
411   expected_.gof.num_ref_pics[1] = 3;
412   expected_.gof.num_ref_pics[2] = 2;
413   expected_.gof.pid_diff[1][0] = 5;
414   expected_.gof.pid_diff[1][1] = 6;
415   expected_.gof.pid_diff[1][2] = 7;
416   expected_.gof.pid_diff[2][0] = 8;
417   expected_.gof.pid_diff[2][1] = 9;
418   Init(kFrameSize, kPacketSize);
419 
420   // One packet:
421   // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (19hdr + 21 payload)
422   // N_S:1, Y:1, G:1
423   // WIDTH:640   // 2 bytes
424   // HEIGHT:360  // 2 bytes
425   // WIDTH:1280  // 2 bytes
426   // HEIGHT:720  // 2 bytes
427   // N_G:3
428   // T:0, U:1, R:0
429   // T:1, U:1, R:3 | P_DIFF[1][0]:5 | P_DIFF[1][1]:6 | P_DIFF[1][2]:7
430   // T:2, U:0, R:2 | P_DIFF[2][0]:8 | P_DIFF[2][0]:9
431   const size_t kExpectedHdrSizes[] = {19};
432   const size_t kExpectedSizes[] = {40};
433   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
434   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
435 }
436 
TEST_F(RtpPacketizerVp9Test,TestSsDataDoesNotFitInAveragePacket)437 TEST_F(RtpPacketizerVp9Test, TestSsDataDoesNotFitInAveragePacket) {
438   const size_t kFrameSize = 24;
439   const size_t kPacketSize = 20;
440 
441   expected_.ss_data_available = true;
442   expected_.num_spatial_layers = 2;
443   expected_.spatial_layer_resolution_present = true;
444   expected_.width[0] = 640;
445   expected_.width[1] = 1280;
446   expected_.height[0] = 360;
447   expected_.height[1] = 720;
448   expected_.gof.num_frames_in_gof = 3;
449   expected_.gof.temporal_idx[0] = 0;
450   expected_.gof.temporal_idx[1] = 1;
451   expected_.gof.temporal_idx[2] = 2;
452   expected_.gof.temporal_up_switch[0] = true;
453   expected_.gof.temporal_up_switch[1] = true;
454   expected_.gof.temporal_up_switch[2] = false;
455   expected_.gof.num_ref_pics[0] = 0;
456   expected_.gof.num_ref_pics[1] = 3;
457   expected_.gof.num_ref_pics[2] = 2;
458   expected_.gof.pid_diff[1][0] = 5;
459   expected_.gof.pid_diff[1][1] = 6;
460   expected_.gof.pid_diff[1][2] = 7;
461   expected_.gof.pid_diff[2][0] = 8;
462   expected_.gof.pid_diff[2][1] = 9;
463   Init(kFrameSize, kPacketSize);
464 
465   // Three packets:
466   // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (19hdr + 1 payload)
467   // N_S:1, Y:1, G:1
468   // WIDTH:640   // 2 bytes
469   // HEIGHT:360  // 2 bytes
470   // WIDTH:1280  // 2 bytes
471   // HEIGHT:720  // 2 bytes
472   // N_G:3
473   // T:0, U:1, R:0
474   // T:1, U:1, R:3 | P_DIFF[1][0]:5 | P_DIFF[1][1]:6 | P_DIFF[1][2]:7
475   // T:2, U:0, R:2 | P_DIFF[2][0]:8 | P_DIFF[2][0]:9
476   // Last two packets 1 bytes vp9 hdrs and the rest of payload 14 and 9 bytes.
477   const size_t kExpectedHdrSizes[] = {19, 1, 1};
478   const size_t kExpectedSizes[] = {20, 15, 10};
479   const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
480   CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
481 }
482 
TEST_F(RtpPacketizerVp9Test,TestOnlyHighestSpatialLayerSetMarker)483 TEST_F(RtpPacketizerVp9Test, TestOnlyHighestSpatialLayerSetMarker) {
484   const size_t kFrameSize = 10;
485   const size_t kPacketSize = 8;
486   const size_t kLastPacketReductionLen = 0;
487   const uint8_t kFrame[kFrameSize] = {7};
488   const RTPFragmentationHeader* kNoFragmentation = nullptr;
489 
490   RTPVideoHeaderVP9 vp9_header;
491   vp9_header.InitRTPVideoHeaderVP9();
492   vp9_header.flexible_mode = true;
493   vp9_header.num_spatial_layers = 3;
494 
495   RtpPacketToSend packet(kNoExtensions);
496 
497   vp9_header.spatial_idx = 0;
498   RtpPacketizerVp9 packetizer0(vp9_header, kPacketSize,
499                                kLastPacketReductionLen);
500   packetizer0.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
501   ASSERT_TRUE(packetizer0.NextPacket(&packet));
502   EXPECT_FALSE(packet.Marker());
503   ASSERT_TRUE(packetizer0.NextPacket(&packet));
504   EXPECT_FALSE(packet.Marker());
505 
506   vp9_header.spatial_idx = 1;
507   RtpPacketizerVp9 packetizer1(vp9_header, kPacketSize,
508                                kLastPacketReductionLen);
509   packetizer1.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
510   ASSERT_TRUE(packetizer1.NextPacket(&packet));
511   EXPECT_FALSE(packet.Marker());
512   ASSERT_TRUE(packetizer1.NextPacket(&packet));
513   EXPECT_FALSE(packet.Marker());
514 
515   vp9_header.spatial_idx = 2;
516   RtpPacketizerVp9 packetizer2(vp9_header, kPacketSize,
517                                kLastPacketReductionLen);
518   packetizer2.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
519   ASSERT_TRUE(packetizer2.NextPacket(&packet));
520   EXPECT_FALSE(packet.Marker());
521   ASSERT_TRUE(packetizer2.NextPacket(&packet));
522   EXPECT_TRUE(packet.Marker());
523 }
524 
TEST_F(RtpPacketizerVp9Test,TestGeneratesMinimumNumberOfPackets)525 TEST_F(RtpPacketizerVp9Test, TestGeneratesMinimumNumberOfPackets) {
526   const size_t kFrameSize = 10;
527   const size_t kPacketSize = 8;
528   const size_t kLastPacketReductionLen = 0;
529   // Calculated by hand. One packet can contain
530   // |kPacketSize| - |kVp9MinDiscriptorSize| = 6 bytes of the frame payload,
531   // thus to fit 10 bytes two packets are required.
532   const size_t kMinNumberOfPackets = 2;
533   const uint8_t kFrame[kFrameSize] = {7};
534   const RTPFragmentationHeader* kNoFragmentation = nullptr;
535 
536   RTPVideoHeaderVP9 vp9_header;
537   vp9_header.InitRTPVideoHeaderVP9();
538 
539   RtpPacketToSend packet(kNoExtensions);
540 
541   RtpPacketizerVp9 packetizer(vp9_header, kPacketSize, kLastPacketReductionLen);
542   EXPECT_EQ(kMinNumberOfPackets, packetizer.SetPayloadData(
543                                      kFrame, sizeof(kFrame), kNoFragmentation));
544   ASSERT_TRUE(packetizer.NextPacket(&packet));
545   EXPECT_FALSE(packet.Marker());
546   ASSERT_TRUE(packetizer.NextPacket(&packet));
547   EXPECT_TRUE(packet.Marker());
548 }
549 
TEST_F(RtpPacketizerVp9Test,TestRespectsLastPacketReductionLen)550 TEST_F(RtpPacketizerVp9Test, TestRespectsLastPacketReductionLen) {
551   const size_t kFrameSize = 10;
552   const size_t kPacketSize = 8;
553   const size_t kLastPacketReductionLen = 5;
554   // Calculated by hand. VP9 payload descriptor is 2 bytes. Like in the test
555   // above, 1 packet is not enough. 2 packets can contain
556   // 2*(|kPacketSize| - |kVp9MinDiscriptorSize|) - |kLastPacketReductionLen| = 7
557   // But three packets are enough, since they have capacity of 3*(8-2)-5=13
558   // bytes.
559   const size_t kMinNumberOfPackets = 3;
560   const uint8_t kFrame[kFrameSize] = {7};
561   const RTPFragmentationHeader* kNoFragmentation = nullptr;
562 
563   RTPVideoHeaderVP9 vp9_header;
564   vp9_header.InitRTPVideoHeaderVP9();
565   vp9_header.flexible_mode = true;
566 
567   RtpPacketToSend packet(kNoExtensions);
568 
569   RtpPacketizerVp9 packetizer0(vp9_header, kPacketSize,
570                                kLastPacketReductionLen);
571   EXPECT_EQ(
572       packetizer0.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation),
573       kMinNumberOfPackets);
574   ASSERT_TRUE(packetizer0.NextPacket(&packet));
575   EXPECT_FALSE(packet.Marker());
576   ASSERT_TRUE(packetizer0.NextPacket(&packet));
577   EXPECT_FALSE(packet.Marker());
578   ASSERT_TRUE(packetizer0.NextPacket(&packet));
579   EXPECT_TRUE(packet.Marker());
580 }
581 
582 class RtpDepacketizerVp9Test : public ::testing::Test {
583  protected:
RtpDepacketizerVp9Test()584   RtpDepacketizerVp9Test()
585       : depacketizer_(new RtpDepacketizerVp9()) {}
586 
SetUp()587   virtual void SetUp() {
588     expected_.InitRTPVideoHeaderVP9();
589   }
590 
591   RTPVideoHeaderVP9 expected_;
592   std::unique_ptr<RtpDepacketizer> depacketizer_;
593 };
594 
TEST_F(RtpDepacketizerVp9Test,ParseBasicHeader)595 TEST_F(RtpDepacketizerVp9Test, ParseBasicHeader) {
596   const uint8_t kHeaderLength = 1;
597   uint8_t packet[4] = {0};
598   packet[0] = 0x0C;  // I:0 P:0 L:0 F:0 B:1 E:1 V:0 R:0
599   expected_.beginning_of_frame = true;
600   expected_.end_of_frame = true;
601   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
602 }
603 
TEST_F(RtpDepacketizerVp9Test,ParseOneBytePictureId)604 TEST_F(RtpDepacketizerVp9Test, ParseOneBytePictureId) {
605   const uint8_t kHeaderLength = 2;
606   uint8_t packet[10] = {0};
607   packet[0] = 0x80;  // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0
608   packet[1] = kMaxOneBytePictureId;
609 
610   expected_.picture_id = kMaxOneBytePictureId;
611   expected_.max_picture_id = kMaxOneBytePictureId;
612   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
613 }
614 
TEST_F(RtpDepacketizerVp9Test,ParseTwoBytePictureId)615 TEST_F(RtpDepacketizerVp9Test, ParseTwoBytePictureId) {
616   const uint8_t kHeaderLength = 3;
617   uint8_t packet[10] = {0};
618   packet[0] = 0x80;  // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0
619   packet[1] = 0x80 | ((kMaxTwoBytePictureId >> 8) & 0x7F);
620   packet[2] = kMaxTwoBytePictureId & 0xFF;
621 
622   expected_.picture_id = kMaxTwoBytePictureId;
623   expected_.max_picture_id = kMaxTwoBytePictureId;
624   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
625 }
626 
TEST_F(RtpDepacketizerVp9Test,ParseLayerInfoWithNonFlexibleMode)627 TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) {
628   const uint8_t kHeaderLength = 3;
629   const uint8_t kTemporalIdx = 2;
630   const uint8_t kUbit = 1;
631   const uint8_t kSpatialIdx = 1;
632   const uint8_t kDbit = 1;
633   const uint8_t kTl0PicIdx = 17;
634   uint8_t packet[13] = {0};
635   packet[0] = 0x20;  // I:0 P:0 L:1 F:0 B:0 E:0 V:0 R:0
636   packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
637   packet[2] = kTl0PicIdx;
638 
639   // T:2 U:1 S:1 D:1
640   // TL0PICIDX:17
641   expected_.temporal_idx = kTemporalIdx;
642   expected_.temporal_up_switch = kUbit ? true : false;
643   expected_.spatial_idx = kSpatialIdx;
644   expected_.inter_layer_predicted = kDbit ? true : false;
645   expected_.tl0_pic_idx = kTl0PicIdx;
646   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
647 }
648 
TEST_F(RtpDepacketizerVp9Test,ParseLayerInfoWithFlexibleMode)649 TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) {
650   const uint8_t kHeaderLength = 2;
651   const uint8_t kTemporalIdx = 2;
652   const uint8_t kUbit = 1;
653   const uint8_t kSpatialIdx = 0;
654   const uint8_t kDbit = 0;
655   uint8_t packet[13] = {0};
656   packet[0] = 0x38;  // I:0 P:0 L:1 F:1 B:1 E:0 V:0 R:0
657   packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
658 
659   // I:0 P:0 L:1 F:1 B:1 E:0 V:0
660   // L:   T:2 U:1 S:0 D:0
661   expected_.beginning_of_frame = true;
662   expected_.flexible_mode = true;
663   expected_.temporal_idx = kTemporalIdx;
664   expected_.temporal_up_switch = kUbit ? true : false;
665   expected_.spatial_idx = kSpatialIdx;
666   expected_.inter_layer_predicted = kDbit ? true : false;
667   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
668 }
669 
TEST_F(RtpDepacketizerVp9Test,ParseRefIdx)670 TEST_F(RtpDepacketizerVp9Test, ParseRefIdx) {
671   const uint8_t kHeaderLength = 6;
672   const int16_t kPictureId = 17;
673   const uint8_t kPdiff1 = 17;
674   const uint8_t kPdiff2 = 18;
675   const uint8_t kPdiff3 = 127;
676   uint8_t packet[13] = {0};
677   packet[0] = 0xD8;  // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0
678   packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F);  // Two byte pictureID.
679   packet[2] = kPictureId;
680   packet[3] = (kPdiff1 << 1) | 1;  // P_DIFF N:1
681   packet[4] = (kPdiff2 << 1) | 1;  // P_DIFF N:1
682   packet[5] = (kPdiff3 << 1) | 0;  // P_DIFF N:0
683 
684   // I:1 P:1 L:0 F:1 B:1 E:0 V:0
685   // I:    PICTURE ID:17
686   // I:
687   // P,F:  P_DIFF:17  N:1 => refPicId = 17 - 17 = 0
688   // P,F:  P_DIFF:18  N:1 => refPicId = (kMaxPictureId + 1) + 17 - 18 = 0x7FFF
689   // P,F:  P_DIFF:127 N:0 => refPicId = (kMaxPictureId + 1) + 17 - 127 = 32658
690   expected_.beginning_of_frame = true;
691   expected_.inter_pic_predicted = true;
692   expected_.flexible_mode = true;
693   expected_.picture_id = kPictureId;
694   expected_.num_ref_pics = 3;
695   expected_.pid_diff[0] = kPdiff1;
696   expected_.pid_diff[1] = kPdiff2;
697   expected_.pid_diff[2] = kPdiff3;
698   expected_.ref_picture_id[0] = 0;
699   expected_.ref_picture_id[1] = 0x7FFF;
700   expected_.ref_picture_id[2] = 32658;
701   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
702 }
703 
TEST_F(RtpDepacketizerVp9Test,ParseRefIdxFailsWithNoPictureId)704 TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) {
705   const uint8_t kPdiff = 3;
706   uint8_t packet[13] = {0};
707   packet[0] = 0x58;            // I:0 P:1 L:0 F:1 B:1 E:0 V:0 R:0
708   packet[1] = (kPdiff << 1);   // P,F:  P_DIFF:3 N:0
709 
710   RtpDepacketizer::ParsedPayload parsed;
711   EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
712 }
713 
TEST_F(RtpDepacketizerVp9Test,ParseRefIdxFailsWithTooManyRefPics)714 TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) {
715   const uint8_t kPdiff = 3;
716   uint8_t packet[13] = {0};
717   packet[0] = 0xD8;                  // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0
718   packet[1] = kMaxOneBytePictureId;  // I:    PICTURE ID:127
719   packet[2] = (kPdiff << 1) | 1;     // P,F:  P_DIFF:3 N:1
720   packet[3] = (kPdiff << 1) | 1;     // P,F:  P_DIFF:3 N:1
721   packet[4] = (kPdiff << 1) | 1;     // P,F:  P_DIFF:3 N:1
722   packet[5] = (kPdiff << 1) | 0;     // P,F:  P_DIFF:3 N:0
723 
724   RtpDepacketizer::ParsedPayload parsed;
725   EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
726 }
727 
TEST_F(RtpDepacketizerVp9Test,ParseSsData)728 TEST_F(RtpDepacketizerVp9Test, ParseSsData) {
729   const uint8_t kHeaderLength = 6;
730   const uint8_t kYbit = 0;
731   const size_t kNs = 2;
732   const size_t kNg = 2;
733   uint8_t packet[23] = {0};
734   packet[0] = 0x0A;  // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0
735   packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (1 << 3);  // N_S Y G:1 -
736   packet[2] = kNg;                                         // N_G
737   packet[3] = (0 << 5) | (1 << 4) | (0 << 2) | 0;          // T:0 U:1 R:0 -
738   packet[4] = (2 << 5) | (0 << 4) | (1 << 2) | 0;          // T:2 U:0 R:1 -
739   packet[5] = 33;
740 
741   expected_.beginning_of_frame = true;
742   expected_.ss_data_available = true;
743   expected_.num_spatial_layers = kNs;
744   expected_.spatial_layer_resolution_present = kYbit ? true : false;
745   expected_.gof.num_frames_in_gof = kNg;
746   expected_.gof.temporal_idx[0] = 0;
747   expected_.gof.temporal_idx[1] = 2;
748   expected_.gof.temporal_up_switch[0] = true;
749   expected_.gof.temporal_up_switch[1] = false;
750   expected_.gof.num_ref_pics[0] = 0;
751   expected_.gof.num_ref_pics[1] = 1;
752   expected_.gof.pid_diff[1][0] = 33;
753   ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
754 }
755 
TEST_F(RtpDepacketizerVp9Test,ParseFirstPacketInKeyFrame)756 TEST_F(RtpDepacketizerVp9Test, ParseFirstPacketInKeyFrame) {
757   uint8_t packet[2] = {0};
758   packet[0] = 0x08;  // I:0 P:0 L:0 F:0 B:1 E:0 V:0 R:0
759 
760   RtpDepacketizer::ParsedPayload parsed;
761   ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
762   EXPECT_EQ(kVideoFrameKey, parsed.frame_type);
763   EXPECT_TRUE(parsed.type.Video.is_first_packet_in_frame);
764 }
765 
TEST_F(RtpDepacketizerVp9Test,ParseLastPacketInDeltaFrame)766 TEST_F(RtpDepacketizerVp9Test, ParseLastPacketInDeltaFrame) {
767   uint8_t packet[2] = {0};
768   packet[0] = 0x44;  // I:0 P:1 L:0 F:0 B:0 E:1 V:0 R:0
769 
770   RtpDepacketizer::ParsedPayload parsed;
771   ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
772   EXPECT_EQ(kVideoFrameDelta, parsed.frame_type);
773   EXPECT_FALSE(parsed.type.Video.is_first_packet_in_frame);
774 }
775 
TEST_F(RtpDepacketizerVp9Test,ParseResolution)776 TEST_F(RtpDepacketizerVp9Test, ParseResolution) {
777   const uint16_t kWidth[2] = {640, 1280};
778   const uint16_t kHeight[2] = {360, 720};
779   uint8_t packet[20] = {0};
780   packet[0] = 0x0A;  // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0
781   packet[1] = (1 << 5) | (1 << 4) | 0;  // N_S:1 Y:1 G:0
782   packet[2] = kWidth[0] >> 8;
783   packet[3] = kWidth[0] & 0xFF;
784   packet[4] = kHeight[0] >> 8;
785   packet[5] = kHeight[0] & 0xFF;
786   packet[6] = kWidth[1] >> 8;
787   packet[7] = kWidth[1] & 0xFF;
788   packet[8] = kHeight[1] >> 8;
789   packet[9] = kHeight[1] & 0xFF;
790 
791   RtpDepacketizer::ParsedPayload parsed;
792   ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
793   EXPECT_EQ(kWidth[0], parsed.type.Video.width);
794   EXPECT_EQ(kHeight[0], parsed.type.Video.height);
795 }
796 
TEST_F(RtpDepacketizerVp9Test,ParseFailsForNoPayloadLength)797 TEST_F(RtpDepacketizerVp9Test, ParseFailsForNoPayloadLength) {
798   uint8_t packet[1] = {0};
799   RtpDepacketizer::ParsedPayload parsed;
800   EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, 0));
801 }
802 
TEST_F(RtpDepacketizerVp9Test,ParseFailsForTooShortBufferToFitPayload)803 TEST_F(RtpDepacketizerVp9Test, ParseFailsForTooShortBufferToFitPayload) {
804   const uint8_t kHeaderLength = 1;
805   uint8_t packet[kHeaderLength] = {0};
806   RtpDepacketizer::ParsedPayload parsed;
807   EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
808 }
809 
810 }  // namespace webrtc
811