1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "common_video/h264/sps_parser.h"
12 
13 #include "common_video/h264/h264_common.h"
14 #include "rtc_base/arraysize.h"
15 #include "rtc_base/bit_buffer.h"
16 #include "rtc_base/buffer.h"
17 #include "test/gtest.h"
18 
19 namespace webrtc {
20 
21 // Example SPS can be generated with ffmpeg. Here's an example set of commands,
22 // runnable on OS X:
23 // 1) Generate a video, from the camera:
24 // ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov
25 //
26 // 2) Scale the video to the desired size:
27 // ffmpeg -i camera.mov -vf scale=640x360 scaled.mov
28 //
29 // 3) Get just the H.264 bitstream in AnnexB:
30 // ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264
31 //
32 // 4) Open out.h264 and find the SPS, generally everything between the first
33 // two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67,
34 // which should be stripped out before being passed to the parser.
35 
36 static const size_t kSpsBufferMaxSize = 256;
37 
38 // Generates a fake SPS with basically everything empty but the width/height.
39 // Pass in a buffer of at least kSpsBufferMaxSize.
40 // The fake SPS that this generates also always has at least one emulation byte
41 // at offset 2, since the first two bytes are always 0, and has a 0x3 as the
42 // level_idc, to make sure the parser doesn't eat all 0x3 bytes.
GenerateFakeSps(uint16_t width,uint16_t height,int id,uint32_t log2_max_frame_num_minus4,uint32_t log2_max_pic_order_cnt_lsb_minus4,rtc::Buffer * out_buffer)43 void GenerateFakeSps(uint16_t width,
44                      uint16_t height,
45                      int id,
46                      uint32_t log2_max_frame_num_minus4,
47                      uint32_t log2_max_pic_order_cnt_lsb_minus4,
48                      rtc::Buffer* out_buffer) {
49   uint8_t rbsp[kSpsBufferMaxSize] = {0};
50   rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
51   // Profile byte.
52   writer.WriteUInt8(0);
53   // Constraint sets and reserved zero bits.
54   writer.WriteUInt8(0);
55   // level_idc.
56   writer.WriteUInt8(0x3u);
57   // seq_paramter_set_id.
58   writer.WriteExponentialGolomb(id);
59   // Profile is not special, so we skip all the chroma format settings.
60 
61   // Now some bit magic.
62   // log2_max_frame_num_minus4: ue(v).
63   writer.WriteExponentialGolomb(log2_max_frame_num_minus4);
64   // pic_order_cnt_type: ue(v). 0 is the type we want.
65   writer.WriteExponentialGolomb(0);
66   // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine.
67   writer.WriteExponentialGolomb(log2_max_pic_order_cnt_lsb_minus4);
68   // max_num_ref_frames: ue(v). 0 is fine.
69   writer.WriteExponentialGolomb(0);
70   // gaps_in_frame_num_value_allowed_flag: u(1).
71   writer.WriteBits(0, 1);
72   // Next are width/height. First, calculate the mbs/map_units versions.
73   uint16_t width_in_mbs_minus1 = (width + 15) / 16 - 1;
74 
75   // For the height, we're going to define frame_mbs_only_flag, so we need to
76   // divide by 2. See the parser for the full calculation.
77   uint16_t height_in_map_units_minus1 = ((height + 15) / 16 - 1) / 2;
78   // Write each as ue(v).
79   writer.WriteExponentialGolomb(width_in_mbs_minus1);
80   writer.WriteExponentialGolomb(height_in_map_units_minus1);
81   // frame_mbs_only_flag: u(1). Needs to be false.
82   writer.WriteBits(0, 1);
83   // mb_adaptive_frame_field_flag: u(1).
84   writer.WriteBits(0, 1);
85   // direct_8x8_inferene_flag: u(1).
86   writer.WriteBits(0, 1);
87   // frame_cropping_flag: u(1). 1, so we can supply crop.
88   writer.WriteBits(1, 1);
89   // Now we write the left/right/top/bottom crop. For simplicity, we'll put all
90   // the crop at the left/top.
91   // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values.
92   // Left/right.
93   writer.WriteExponentialGolomb(((16 - (width % 16)) % 16) / 2);
94   writer.WriteExponentialGolomb(0);
95   // Top/bottom.
96   writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2);
97   writer.WriteExponentialGolomb(0);
98 
99   // vui_parameters_present_flag: u(1)
100   writer.WriteBits(0, 1);
101 
102   // Get the number of bytes written (including the last partial byte).
103   size_t byte_count, bit_offset;
104   writer.GetCurrentOffset(&byte_count, &bit_offset);
105   if (bit_offset > 0) {
106     byte_count++;
107   }
108 
109   out_buffer->Clear();
110   H264::WriteRbsp(rbsp, byte_count, out_buffer);
111 }
112 
113 // TODO(nisse): Delete test fixture.
114 class H264SpsParserTest : public ::testing::Test {
115  public:
H264SpsParserTest()116   H264SpsParserTest() {}
~H264SpsParserTest()117   ~H264SpsParserTest() override {}
118 
119   absl::optional<SpsParser::SpsState> sps_;
120 };
121 
TEST_F(H264SpsParserTest,TestSampleSPSHdLandscape)122 TEST_F(H264SpsParserTest, TestSampleSPSHdLandscape) {
123   // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
124   // emulation bytes but no cropping.
125   const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
126                             0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
127                             0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
128   EXPECT_TRUE(
129       static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
130   EXPECT_EQ(1280u, sps_->width);
131   EXPECT_EQ(720u, sps_->height);
132 }
133 
TEST_F(H264SpsParserTest,TestSampleSPSVgaLandscape)134 TEST_F(H264SpsParserTest, TestSampleSPSVgaLandscape) {
135   // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
136   // bytes and cropping (360 isn't divisible by 16).
137   const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
138                             0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
139                             0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
140   EXPECT_TRUE(
141       static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
142   EXPECT_EQ(640u, sps_->width);
143   EXPECT_EQ(360u, sps_->height);
144 }
145 
TEST_F(H264SpsParserTest,TestSampleSPSWeirdResolution)146 TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) {
147   // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
148   // veritcal crop (neither dimension is divisible by 16).
149   const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
150                             0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
151                             0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
152   EXPECT_TRUE(
153       static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
154   EXPECT_EQ(200u, sps_->width);
155   EXPECT_EQ(400u, sps_->height);
156 }
157 
TEST_F(H264SpsParserTest,TestSyntheticSPSQvgaLandscape)158 TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
159   rtc::Buffer buffer;
160   GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
161   EXPECT_TRUE(static_cast<bool>(
162       sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
163   EXPECT_EQ(320u, sps_->width);
164   EXPECT_EQ(180u, sps_->height);
165   EXPECT_EQ(1u, sps_->id);
166 }
167 
TEST_F(H264SpsParserTest,TestSyntheticSPSWeirdResolution)168 TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
169   rtc::Buffer buffer;
170   GenerateFakeSps(156u, 122u, 2, 0, 0, &buffer);
171   EXPECT_TRUE(static_cast<bool>(
172       sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
173   EXPECT_EQ(156u, sps_->width);
174   EXPECT_EQ(122u, sps_->height);
175   EXPECT_EQ(2u, sps_->id);
176 }
177 
TEST_F(H264SpsParserTest,TestSampleSPSWithScalingLists)178 TEST_F(H264SpsParserTest, TestSampleSPSWithScalingLists) {
179   // SPS from a 1920x1080 video. Contains scaling lists (and vertical cropping).
180   const uint8_t buffer[] = {0x64, 0x00, 0x2a, 0xad, 0x84, 0x01, 0x0c, 0x20,
181                             0x08, 0x61, 0x00, 0x43, 0x08, 0x02, 0x18, 0x40,
182                             0x10, 0xc2, 0x00, 0x84, 0x3b, 0x50, 0x3c, 0x01,
183                             0x13, 0xf2, 0xcd, 0xc0, 0x40, 0x40, 0x50, 0x00,
184                             0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0xe8, 0x40};
185   EXPECT_TRUE(
186       static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
187   EXPECT_EQ(1920u, sps_->width);
188   EXPECT_EQ(1080u, sps_->height);
189 }
190 
TEST_F(H264SpsParserTest,TestLog2MaxFrameNumMinus4)191 TEST_F(H264SpsParserTest, TestLog2MaxFrameNumMinus4) {
192   rtc::Buffer buffer;
193   GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
194   EXPECT_TRUE(static_cast<bool>(
195       sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
196   EXPECT_EQ(320u, sps_->width);
197   EXPECT_EQ(180u, sps_->height);
198   EXPECT_EQ(1u, sps_->id);
199   EXPECT_EQ(4u, sps_->log2_max_frame_num);
200 
201   GenerateFakeSps(320u, 180u, 1, 28, 0, &buffer);
202   EXPECT_TRUE(static_cast<bool>(
203       sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
204   EXPECT_EQ(320u, sps_->width);
205   EXPECT_EQ(180u, sps_->height);
206   EXPECT_EQ(1u, sps_->id);
207   EXPECT_EQ(32u, sps_->log2_max_frame_num);
208 
209   GenerateFakeSps(320u, 180u, 1, 29, 0, &buffer);
210   EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size()));
211 }
212 
TEST_F(H264SpsParserTest,TestLog2MaxPicOrderCntMinus4)213 TEST_F(H264SpsParserTest, TestLog2MaxPicOrderCntMinus4) {
214   rtc::Buffer buffer;
215   GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
216   EXPECT_TRUE(static_cast<bool>(
217       sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
218   EXPECT_EQ(320u, sps_->width);
219   EXPECT_EQ(180u, sps_->height);
220   EXPECT_EQ(1u, sps_->id);
221   EXPECT_EQ(4u, sps_->log2_max_pic_order_cnt_lsb);
222 
223   GenerateFakeSps(320u, 180u, 1, 0, 28, &buffer);
224   EXPECT_TRUE(static_cast<bool>(
225       sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
226   EXPECT_EQ(320u, sps_->width);
227   EXPECT_EQ(180u, sps_->height);
228   EXPECT_EQ(1u, sps_->id);
229   EXPECT_EQ(32u, sps_->log2_max_pic_order_cnt_lsb);
230 
231   GenerateFakeSps(320u, 180u, 1, 0, 29, &buffer);
232   EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size()));
233 }
234 
235 }  // namespace webrtc
236