1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "common_video/h264/sps_parser.h"
12 
13 #include <memory>
14 #include <vector>
15 
16 #include "common_video/h264/h264_common.h"
17 #include "rtc_base/bitbuffer.h"
18 #include "rtc_base/logging.h"
19 
20 typedef rtc::Optional<webrtc::SpsParser::SpsState> OptionalSps;
21 
22 #define RETURN_EMPTY_ON_FAIL(x) \
23   if (!(x)) {                   \
24     return OptionalSps();       \
25   }
26 
27 namespace webrtc {
28 
29 // General note: this is based off the 02/2014 version of the H.264 standard.
30 // You can find it on this page:
31 // http://www.itu.int/rec/T-REC-H.264
32 
33 // Unpack RBSP and parse SPS state from the supplied buffer.
ParseSps(const uint8_t * data,size_t length)34 rtc::Optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data,
35                                                        size_t length) {
36   std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length);
37   rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
38   return ParseSpsUpToVui(&bit_buffer);
39 }
40 
ParseSpsUpToVui(rtc::BitBuffer * buffer)41 rtc::Optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
42     rtc::BitBuffer* buffer) {
43   // Now, we need to use a bit buffer to parse through the actual AVC SPS
44   // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
45   // H.264 standard for a complete description.
46   // Since we only care about resolution, we ignore the majority of fields, but
47   // we still have to actively parse through a lot of the data, since many of
48   // the fields have variable size.
49   // We're particularly interested in:
50   // chroma_format_idc -> affects crop units
51   // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
52   // frame_crop_*_offset -> crop information
53 
54   SpsState sps;
55 
56   // The golomb values we have to read, not just consume.
57   uint32_t golomb_ignored;
58 
59   // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
60   // 0. It defaults to 1, when not specified.
61   uint32_t chroma_format_idc = 1;
62 
63   // profile_idc: u(8). We need it to determine if we need to read/skip chroma
64   // formats.
65   uint8_t profile_idc;
66   RETURN_EMPTY_ON_FAIL(buffer->ReadUInt8(&profile_idc));
67   // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
68   // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
69   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
70   // level_idc: u(8)
71   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
72   // seq_parameter_set_id: ue(v)
73   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id));
74   sps.separate_colour_plane_flag = 0;
75   // See if profile_idc has chroma format information.
76   if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
77       profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
78       profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
79       profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
80     // chroma_format_idc: ue(v)
81     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc));
82     if (chroma_format_idc == 3) {
83       // separate_colour_plane_flag: u(1)
84       RETURN_EMPTY_ON_FAIL(
85           buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
86     }
87     // bit_depth_luma_minus8: ue(v)
88     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
89     // bit_depth_chroma_minus8: ue(v)
90     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
91     // qpprime_y_zero_transform_bypass_flag: u(1)
92     RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
93     // seq_scaling_matrix_present_flag: u(1)
94     uint32_t seq_scaling_matrix_present_flag;
95     RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&seq_scaling_matrix_present_flag, 1));
96     if (seq_scaling_matrix_present_flag) {
97       // Process the scaling lists just enough to be able to properly
98       // skip over them, so we can still read the resolution on streams
99       // where this is included.
100       int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8);
101       for (int i = 0; i < scaling_list_count; ++i) {
102         // seq_scaling_list_present_flag[i]  : u(1)
103         uint32_t seq_scaling_list_present_flags;
104         RETURN_EMPTY_ON_FAIL(
105             buffer->ReadBits(&seq_scaling_list_present_flags, 1));
106         if (seq_scaling_list_present_flags != 0) {
107           int last_scale = 8;
108           int next_scale = 8;
109           int size_of_scaling_list = i < 6 ? 16 : 64;
110           for (int j = 0; j < size_of_scaling_list; j++) {
111             if (next_scale != 0) {
112               int32_t delta_scale;
113               // delta_scale: se(v)
114               RETURN_EMPTY_ON_FAIL(
115                   buffer->ReadSignedExponentialGolomb(&delta_scale));
116               next_scale = (last_scale + delta_scale + 256) % 256;
117             }
118             if (next_scale != 0)
119               last_scale = next_scale;
120           }
121         }
122       }
123     }
124   }
125   // log2_max_frame_num_minus4: ue(v)
126   RETURN_EMPTY_ON_FAIL(
127       buffer->ReadExponentialGolomb(&sps.log2_max_frame_num_minus4));
128   // pic_order_cnt_type: ue(v)
129   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.pic_order_cnt_type));
130   if (sps.pic_order_cnt_type == 0) {
131     // log2_max_pic_order_cnt_lsb_minus4: ue(v)
132     RETURN_EMPTY_ON_FAIL(
133         buffer->ReadExponentialGolomb(&sps.log2_max_pic_order_cnt_lsb_minus4));
134   } else if (sps.pic_order_cnt_type == 1) {
135     // delta_pic_order_always_zero_flag: u(1)
136     RETURN_EMPTY_ON_FAIL(
137         buffer->ReadBits(&sps.delta_pic_order_always_zero_flag, 1));
138     // offset_for_non_ref_pic: se(v)
139     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
140     // offset_for_top_to_bottom_field: se(v)
141     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
142     // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
143     uint32_t num_ref_frames_in_pic_order_cnt_cycle;
144     RETURN_EMPTY_ON_FAIL(
145         buffer->ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
146     for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
147       // offset_for_ref_frame[i]: se(v)
148       RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
149     }
150   }
151   // max_num_ref_frames: ue(v)
152   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.max_num_ref_frames));
153   // gaps_in_frame_num_value_allowed_flag: u(1)
154   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
155   //
156   // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
157   // width/height in macroblocks (16x16), which gives us the base resolution,
158   // and then we continue on until we hit the frame crop offsets, which are used
159   // to signify resolutions that aren't multiples of 16.
160   //
161   // pic_width_in_mbs_minus1: ue(v)
162   uint32_t pic_width_in_mbs_minus1;
163   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&pic_width_in_mbs_minus1));
164   // pic_height_in_map_units_minus1: ue(v)
165   uint32_t pic_height_in_map_units_minus1;
166   RETURN_EMPTY_ON_FAIL(
167       buffer->ReadExponentialGolomb(&pic_height_in_map_units_minus1));
168   // frame_mbs_only_flag: u(1)
169   RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.frame_mbs_only_flag, 1));
170   if (!sps.frame_mbs_only_flag) {
171     // mb_adaptive_frame_field_flag: u(1)
172     RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
173   }
174   // direct_8x8_inference_flag: u(1)
175   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
176   //
177   // MORE IMPORTANT ONES! Now we're at the frame crop information.
178   //
179   // frame_cropping_flag: u(1)
180   uint32_t frame_cropping_flag;
181   uint32_t frame_crop_left_offset = 0;
182   uint32_t frame_crop_right_offset = 0;
183   uint32_t frame_crop_top_offset = 0;
184   uint32_t frame_crop_bottom_offset = 0;
185   RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&frame_cropping_flag, 1));
186   if (frame_cropping_flag) {
187     // frame_crop_{left, right, top, bottom}_offset: ue(v)
188     RETURN_EMPTY_ON_FAIL(
189         buffer->ReadExponentialGolomb(&frame_crop_left_offset));
190     RETURN_EMPTY_ON_FAIL(
191         buffer->ReadExponentialGolomb(&frame_crop_right_offset));
192     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&frame_crop_top_offset));
193     RETURN_EMPTY_ON_FAIL(
194         buffer->ReadExponentialGolomb(&frame_crop_bottom_offset));
195   }
196   // vui_parameters_present_flag: u(1)
197   RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.vui_params_present, 1));
198 
199   // Far enough! We don't use the rest of the SPS.
200 
201   // Start with the resolution determined by the pic_width/pic_height fields.
202   sps.width = 16 * (pic_width_in_mbs_minus1 + 1);
203   sps.height =
204       16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
205 
206   // Figure out the crop units in pixels. That's based on the chroma format's
207   // sampling, which is indicated by chroma_format_idc.
208   if (sps.separate_colour_plane_flag || chroma_format_idc == 0) {
209     frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag);
210     frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag);
211   } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) {
212     // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
213     if (chroma_format_idc == 1 || chroma_format_idc == 2) {
214       frame_crop_left_offset *= 2;
215       frame_crop_right_offset *= 2;
216     }
217     // Height multipliers for format 1 (4:2:0).
218     if (chroma_format_idc == 1) {
219       frame_crop_top_offset *= 2;
220       frame_crop_bottom_offset *= 2;
221     }
222   }
223   // Subtract the crop for each dimension.
224   sps.width -= (frame_crop_left_offset + frame_crop_right_offset);
225   sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset);
226 
227   return OptionalSps(sps);
228 }
229 
230 }  // namespace webrtc
231