1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "common_video/h264/sps_parser.h"
12
13 #include "common_video/h264/h264_common.h"
14 #include "rtc_base/arraysize.h"
15 #include "rtc_base/bit_buffer.h"
16 #include "rtc_base/buffer.h"
17 #include "test/gtest.h"
18
19 namespace webrtc {
20
21 // Example SPS can be generated with ffmpeg. Here's an example set of commands,
22 // runnable on OS X:
23 // 1) Generate a video, from the camera:
24 // ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov
25 //
26 // 2) Scale the video to the desired size:
27 // ffmpeg -i camera.mov -vf scale=640x360 scaled.mov
28 //
29 // 3) Get just the H.264 bitstream in AnnexB:
30 // ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264
31 //
32 // 4) Open out.h264 and find the SPS, generally everything between the first
33 // two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67,
34 // which should be stripped out before being passed to the parser.
35
36 static const size_t kSpsBufferMaxSize = 256;
37
38 // Generates a fake SPS with basically everything empty but the width/height.
39 // Pass in a buffer of at least kSpsBufferMaxSize.
40 // The fake SPS that this generates also always has at least one emulation byte
41 // at offset 2, since the first two bytes are always 0, and has a 0x3 as the
42 // level_idc, to make sure the parser doesn't eat all 0x3 bytes.
GenerateFakeSps(uint16_t width,uint16_t height,int id,uint32_t log2_max_frame_num_minus4,uint32_t log2_max_pic_order_cnt_lsb_minus4,rtc::Buffer * out_buffer)43 void GenerateFakeSps(uint16_t width,
44 uint16_t height,
45 int id,
46 uint32_t log2_max_frame_num_minus4,
47 uint32_t log2_max_pic_order_cnt_lsb_minus4,
48 rtc::Buffer* out_buffer) {
49 uint8_t rbsp[kSpsBufferMaxSize] = {0};
50 rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
51 // Profile byte.
52 writer.WriteUInt8(0);
53 // Constraint sets and reserved zero bits.
54 writer.WriteUInt8(0);
55 // level_idc.
56 writer.WriteUInt8(0x3u);
57 // seq_paramter_set_id.
58 writer.WriteExponentialGolomb(id);
59 // Profile is not special, so we skip all the chroma format settings.
60
61 // Now some bit magic.
62 // log2_max_frame_num_minus4: ue(v).
63 writer.WriteExponentialGolomb(log2_max_frame_num_minus4);
64 // pic_order_cnt_type: ue(v). 0 is the type we want.
65 writer.WriteExponentialGolomb(0);
66 // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine.
67 writer.WriteExponentialGolomb(log2_max_pic_order_cnt_lsb_minus4);
68 // max_num_ref_frames: ue(v). 0 is fine.
69 writer.WriteExponentialGolomb(0);
70 // gaps_in_frame_num_value_allowed_flag: u(1).
71 writer.WriteBits(0, 1);
72 // Next are width/height. First, calculate the mbs/map_units versions.
73 uint16_t width_in_mbs_minus1 = (width + 15) / 16 - 1;
74
75 // For the height, we're going to define frame_mbs_only_flag, so we need to
76 // divide by 2. See the parser for the full calculation.
77 uint16_t height_in_map_units_minus1 = ((height + 15) / 16 - 1) / 2;
78 // Write each as ue(v).
79 writer.WriteExponentialGolomb(width_in_mbs_minus1);
80 writer.WriteExponentialGolomb(height_in_map_units_minus1);
81 // frame_mbs_only_flag: u(1). Needs to be false.
82 writer.WriteBits(0, 1);
83 // mb_adaptive_frame_field_flag: u(1).
84 writer.WriteBits(0, 1);
85 // direct_8x8_inferene_flag: u(1).
86 writer.WriteBits(0, 1);
87 // frame_cropping_flag: u(1). 1, so we can supply crop.
88 writer.WriteBits(1, 1);
89 // Now we write the left/right/top/bottom crop. For simplicity, we'll put all
90 // the crop at the left/top.
91 // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values.
92 // Left/right.
93 writer.WriteExponentialGolomb(((16 - (width % 16)) % 16) / 2);
94 writer.WriteExponentialGolomb(0);
95 // Top/bottom.
96 writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2);
97 writer.WriteExponentialGolomb(0);
98
99 // vui_parameters_present_flag: u(1)
100 writer.WriteBits(0, 1);
101
102 // Get the number of bytes written (including the last partial byte).
103 size_t byte_count, bit_offset;
104 writer.GetCurrentOffset(&byte_count, &bit_offset);
105 if (bit_offset > 0) {
106 byte_count++;
107 }
108
109 out_buffer->Clear();
110 H264::WriteRbsp(rbsp, byte_count, out_buffer);
111 }
112
113 // TODO(nisse): Delete test fixture.
114 class H264SpsParserTest : public ::testing::Test {
115 public:
H264SpsParserTest()116 H264SpsParserTest() {}
~H264SpsParserTest()117 ~H264SpsParserTest() override {}
118
119 absl::optional<SpsParser::SpsState> sps_;
120 };
121
TEST_F(H264SpsParserTest,TestSampleSPSHdLandscape)122 TEST_F(H264SpsParserTest, TestSampleSPSHdLandscape) {
123 // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
124 // emulation bytes but no cropping.
125 const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
126 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
127 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
128 EXPECT_TRUE(
129 static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
130 EXPECT_EQ(1280u, sps_->width);
131 EXPECT_EQ(720u, sps_->height);
132 }
133
TEST_F(H264SpsParserTest,TestSampleSPSVgaLandscape)134 TEST_F(H264SpsParserTest, TestSampleSPSVgaLandscape) {
135 // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
136 // bytes and cropping (360 isn't divisible by 16).
137 const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
138 0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
139 0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
140 EXPECT_TRUE(
141 static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
142 EXPECT_EQ(640u, sps_->width);
143 EXPECT_EQ(360u, sps_->height);
144 }
145
TEST_F(H264SpsParserTest,TestSampleSPSWeirdResolution)146 TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) {
147 // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
148 // veritcal crop (neither dimension is divisible by 16).
149 const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
150 0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
151 0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
152 EXPECT_TRUE(
153 static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
154 EXPECT_EQ(200u, sps_->width);
155 EXPECT_EQ(400u, sps_->height);
156 }
157
TEST_F(H264SpsParserTest,TestSyntheticSPSQvgaLandscape)158 TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
159 rtc::Buffer buffer;
160 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
161 EXPECT_TRUE(static_cast<bool>(
162 sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
163 EXPECT_EQ(320u, sps_->width);
164 EXPECT_EQ(180u, sps_->height);
165 EXPECT_EQ(1u, sps_->id);
166 }
167
TEST_F(H264SpsParserTest,TestSyntheticSPSWeirdResolution)168 TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
169 rtc::Buffer buffer;
170 GenerateFakeSps(156u, 122u, 2, 0, 0, &buffer);
171 EXPECT_TRUE(static_cast<bool>(
172 sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
173 EXPECT_EQ(156u, sps_->width);
174 EXPECT_EQ(122u, sps_->height);
175 EXPECT_EQ(2u, sps_->id);
176 }
177
TEST_F(H264SpsParserTest,TestSampleSPSWithScalingLists)178 TEST_F(H264SpsParserTest, TestSampleSPSWithScalingLists) {
179 // SPS from a 1920x1080 video. Contains scaling lists (and vertical cropping).
180 const uint8_t buffer[] = {0x64, 0x00, 0x2a, 0xad, 0x84, 0x01, 0x0c, 0x20,
181 0x08, 0x61, 0x00, 0x43, 0x08, 0x02, 0x18, 0x40,
182 0x10, 0xc2, 0x00, 0x84, 0x3b, 0x50, 0x3c, 0x01,
183 0x13, 0xf2, 0xcd, 0xc0, 0x40, 0x40, 0x50, 0x00,
184 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0xe8, 0x40};
185 EXPECT_TRUE(
186 static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
187 EXPECT_EQ(1920u, sps_->width);
188 EXPECT_EQ(1080u, sps_->height);
189 }
190
TEST_F(H264SpsParserTest,TestLog2MaxFrameNumMinus4)191 TEST_F(H264SpsParserTest, TestLog2MaxFrameNumMinus4) {
192 rtc::Buffer buffer;
193 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
194 EXPECT_TRUE(static_cast<bool>(
195 sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
196 EXPECT_EQ(320u, sps_->width);
197 EXPECT_EQ(180u, sps_->height);
198 EXPECT_EQ(1u, sps_->id);
199 EXPECT_EQ(4u, sps_->log2_max_frame_num);
200
201 GenerateFakeSps(320u, 180u, 1, 28, 0, &buffer);
202 EXPECT_TRUE(static_cast<bool>(
203 sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
204 EXPECT_EQ(320u, sps_->width);
205 EXPECT_EQ(180u, sps_->height);
206 EXPECT_EQ(1u, sps_->id);
207 EXPECT_EQ(32u, sps_->log2_max_frame_num);
208
209 GenerateFakeSps(320u, 180u, 1, 29, 0, &buffer);
210 EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size()));
211 }
212
TEST_F(H264SpsParserTest,TestLog2MaxPicOrderCntMinus4)213 TEST_F(H264SpsParserTest, TestLog2MaxPicOrderCntMinus4) {
214 rtc::Buffer buffer;
215 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer);
216 EXPECT_TRUE(static_cast<bool>(
217 sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
218 EXPECT_EQ(320u, sps_->width);
219 EXPECT_EQ(180u, sps_->height);
220 EXPECT_EQ(1u, sps_->id);
221 EXPECT_EQ(4u, sps_->log2_max_pic_order_cnt_lsb);
222
223 GenerateFakeSps(320u, 180u, 1, 0, 28, &buffer);
224 EXPECT_TRUE(static_cast<bool>(
225 sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
226 EXPECT_EQ(320u, sps_->width);
227 EXPECT_EQ(180u, sps_->height);
228 EXPECT_EQ(1u, sps_->id);
229 EXPECT_EQ(32u, sps_->log2_max_pic_order_cnt_lsb);
230
231 GenerateFakeSps(320u, 180u, 1, 0, 29, &buffer);
232 EXPECT_FALSE(SpsParser::ParseSps(buffer.data(), buffer.size()));
233 }
234
235 } // namespace webrtc
236