1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "common_video/h264/sps_parser.h"
12 
13 #include <cstdint>
14 #include <vector>
15 
16 #include "common_video/h264/h264_common.h"
17 #include "rtc_base/bit_buffer.h"
18 
19 namespace {
20 typedef absl::optional<webrtc::SpsParser::SpsState> OptionalSps;
21 
22 #define RETURN_EMPTY_ON_FAIL(x) \
23   if (!(x)) {                   \
24     return OptionalSps();       \
25   }
26 
27 constexpr int kScalingDeltaMin = -128;
28 constexpr int kScaldingDeltaMax = 127;
29 }  // namespace
30 
31 namespace webrtc {
32 
33 SpsParser::SpsState::SpsState() = default;
34 SpsParser::SpsState::SpsState(const SpsState&) = default;
35 SpsParser::SpsState::~SpsState() = default;
36 
37 // General note: this is based off the 02/2014 version of the H.264 standard.
38 // You can find it on this page:
39 // http://www.itu.int/rec/T-REC-H.264
40 
41 // Unpack RBSP and parse SPS state from the supplied buffer.
ParseSps(const uint8_t * data,size_t length)42 absl::optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data,
43                                                         size_t length) {
44   std::vector<uint8_t> unpacked_buffer = H264::ParseRbsp(data, length);
45   rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
46   return ParseSpsUpToVui(&bit_buffer);
47 }
48 
ParseSpsUpToVui(rtc::BitBuffer * buffer)49 absl::optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
50     rtc::BitBuffer* buffer) {
51   // Now, we need to use a bit buffer to parse through the actual AVC SPS
52   // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
53   // H.264 standard for a complete description.
54   // Since we only care about resolution, we ignore the majority of fields, but
55   // we still have to actively parse through a lot of the data, since many of
56   // the fields have variable size.
57   // We're particularly interested in:
58   // chroma_format_idc -> affects crop units
59   // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
60   // frame_crop_*_offset -> crop information
61 
62   SpsState sps;
63 
64   // The golomb values we have to read, not just consume.
65   uint32_t golomb_ignored;
66 
67   // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
68   // 0. It defaults to 1, when not specified.
69   uint32_t chroma_format_idc = 1;
70 
71   // profile_idc: u(8). We need it to determine if we need to read/skip chroma
72   // formats.
73   uint8_t profile_idc;
74   RETURN_EMPTY_ON_FAIL(buffer->ReadUInt8(&profile_idc));
75   // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
76   // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
77   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
78   // level_idc: u(8)
79   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
80   // seq_parameter_set_id: ue(v)
81   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id));
82   sps.separate_colour_plane_flag = 0;
83   // See if profile_idc has chroma format information.
84   if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
85       profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
86       profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
87       profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
88     // chroma_format_idc: ue(v)
89     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc));
90     if (chroma_format_idc == 3) {
91       // separate_colour_plane_flag: u(1)
92       RETURN_EMPTY_ON_FAIL(
93           buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
94     }
95     // bit_depth_luma_minus8: ue(v)
96     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
97     // bit_depth_chroma_minus8: ue(v)
98     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
99     // qpprime_y_zero_transform_bypass_flag: u(1)
100     RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
101     // seq_scaling_matrix_present_flag: u(1)
102     uint32_t seq_scaling_matrix_present_flag;
103     RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&seq_scaling_matrix_present_flag, 1));
104     if (seq_scaling_matrix_present_flag) {
105       // Process the scaling lists just enough to be able to properly
106       // skip over them, so we can still read the resolution on streams
107       // where this is included.
108       int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8);
109       for (int i = 0; i < scaling_list_count; ++i) {
110         // seq_scaling_list_present_flag[i]  : u(1)
111         uint32_t seq_scaling_list_present_flags;
112         RETURN_EMPTY_ON_FAIL(
113             buffer->ReadBits(&seq_scaling_list_present_flags, 1));
114         if (seq_scaling_list_present_flags != 0) {
115           int last_scale = 8;
116           int next_scale = 8;
117           int size_of_scaling_list = i < 6 ? 16 : 64;
118           for (int j = 0; j < size_of_scaling_list; j++) {
119             if (next_scale != 0) {
120               int32_t delta_scale;
121               // delta_scale: se(v)
122               RETURN_EMPTY_ON_FAIL(
123                   buffer->ReadSignedExponentialGolomb(&delta_scale));
124               RETURN_EMPTY_ON_FAIL(delta_scale >= kScalingDeltaMin &&
125                                    delta_scale <= kScaldingDeltaMax);
126               next_scale = (last_scale + delta_scale + 256) % 256;
127             }
128             if (next_scale != 0)
129               last_scale = next_scale;
130           }
131         }
132       }
133     }
134   }
135   // log2_max_frame_num and log2_max_pic_order_cnt_lsb are used with
136   // BitBuffer::ReadBits, which can read at most 32 bits at a time. We also have
137   // to avoid overflow when adding 4 to the on-wire golomb value, e.g., for evil
138   // input data, ReadExponentialGolomb might return 0xfffc.
139   const uint32_t kMaxLog2Minus4 = 32 - 4;
140 
141   // log2_max_frame_num_minus4: ue(v)
142   uint32_t log2_max_frame_num_minus4;
143   if (!buffer->ReadExponentialGolomb(&log2_max_frame_num_minus4) ||
144       log2_max_frame_num_minus4 > kMaxLog2Minus4) {
145     return OptionalSps();
146   }
147   sps.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
148 
149   // pic_order_cnt_type: ue(v)
150   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.pic_order_cnt_type));
151   if (sps.pic_order_cnt_type == 0) {
152     // log2_max_pic_order_cnt_lsb_minus4: ue(v)
153     uint32_t log2_max_pic_order_cnt_lsb_minus4;
154     if (!buffer->ReadExponentialGolomb(&log2_max_pic_order_cnt_lsb_minus4) ||
155         log2_max_pic_order_cnt_lsb_minus4 > kMaxLog2Minus4) {
156       return OptionalSps();
157     }
158     sps.log2_max_pic_order_cnt_lsb = log2_max_pic_order_cnt_lsb_minus4 + 4;
159   } else if (sps.pic_order_cnt_type == 1) {
160     // delta_pic_order_always_zero_flag: u(1)
161     RETURN_EMPTY_ON_FAIL(
162         buffer->ReadBits(&sps.delta_pic_order_always_zero_flag, 1));
163     // offset_for_non_ref_pic: se(v)
164     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
165     // offset_for_top_to_bottom_field: se(v)
166     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
167     // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
168     uint32_t num_ref_frames_in_pic_order_cnt_cycle;
169     RETURN_EMPTY_ON_FAIL(
170         buffer->ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
171     for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
172       // offset_for_ref_frame[i]: se(v)
173       RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
174     }
175   }
176   // max_num_ref_frames: ue(v)
177   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.max_num_ref_frames));
178   // gaps_in_frame_num_value_allowed_flag: u(1)
179   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
180   //
181   // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
182   // width/height in macroblocks (16x16), which gives us the base resolution,
183   // and then we continue on until we hit the frame crop offsets, which are used
184   // to signify resolutions that aren't multiples of 16.
185   //
186   // pic_width_in_mbs_minus1: ue(v)
187   uint32_t pic_width_in_mbs_minus1;
188   RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&pic_width_in_mbs_minus1));
189   // pic_height_in_map_units_minus1: ue(v)
190   uint32_t pic_height_in_map_units_minus1;
191   RETURN_EMPTY_ON_FAIL(
192       buffer->ReadExponentialGolomb(&pic_height_in_map_units_minus1));
193   // frame_mbs_only_flag: u(1)
194   RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.frame_mbs_only_flag, 1));
195   if (!sps.frame_mbs_only_flag) {
196     // mb_adaptive_frame_field_flag: u(1)
197     RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
198   }
199   // direct_8x8_inference_flag: u(1)
200   RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
201   //
202   // MORE IMPORTANT ONES! Now we're at the frame crop information.
203   //
204   // frame_cropping_flag: u(1)
205   uint32_t frame_cropping_flag;
206   uint32_t frame_crop_left_offset = 0;
207   uint32_t frame_crop_right_offset = 0;
208   uint32_t frame_crop_top_offset = 0;
209   uint32_t frame_crop_bottom_offset = 0;
210   RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&frame_cropping_flag, 1));
211   if (frame_cropping_flag) {
212     // frame_crop_{left, right, top, bottom}_offset: ue(v)
213     RETURN_EMPTY_ON_FAIL(
214         buffer->ReadExponentialGolomb(&frame_crop_left_offset));
215     RETURN_EMPTY_ON_FAIL(
216         buffer->ReadExponentialGolomb(&frame_crop_right_offset));
217     RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&frame_crop_top_offset));
218     RETURN_EMPTY_ON_FAIL(
219         buffer->ReadExponentialGolomb(&frame_crop_bottom_offset));
220   }
221   // vui_parameters_present_flag: u(1)
222   RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.vui_params_present, 1));
223 
224   // Far enough! We don't use the rest of the SPS.
225 
226   // Start with the resolution determined by the pic_width/pic_height fields.
227   sps.width = 16 * (pic_width_in_mbs_minus1 + 1);
228   sps.height =
229       16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
230 
231   // Figure out the crop units in pixels. That's based on the chroma format's
232   // sampling, which is indicated by chroma_format_idc.
233   if (sps.separate_colour_plane_flag || chroma_format_idc == 0) {
234     frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag);
235     frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag);
236   } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) {
237     // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
238     if (chroma_format_idc == 1 || chroma_format_idc == 2) {
239       frame_crop_left_offset *= 2;
240       frame_crop_right_offset *= 2;
241     }
242     // Height multipliers for format 1 (4:2:0).
243     if (chroma_format_idc == 1) {
244       frame_crop_top_offset *= 2;
245       frame_crop_bottom_offset *= 2;
246     }
247   }
248   // Subtract the crop for each dimension.
249   sps.width -= (frame_crop_left_offset + frame_crop_right_offset);
250   sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset);
251 
252   return OptionalSps(sps);
253 }
254 
255 }  // namespace webrtc
256