1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "common_video/h264/h264_bitstream_parser.h"
11 
12 #include <stdlib.h>
13 
14 #include <cstdint>
15 #include <vector>
16 
17 #include "common_video/h264/h264_common.h"
18 #include "rtc_base/bit_buffer.h"
19 #include "rtc_base/logging.h"
20 
21 namespace {
22 
23 const int kMaxAbsQpDeltaValue = 51;
24 const int kMinQpValue = 0;
25 const int kMaxQpValue = 51;
26 
27 }  // namespace
28 
29 namespace webrtc {
30 
31 #define RETURN_ON_FAIL(x, res)            \
32   if (!(x)) {                             \
33     RTC_LOG_F(LS_ERROR) << "FAILED: " #x; \
34     return res;                           \
35   }
36 
37 #define RETURN_INV_ON_FAIL(x) RETURN_ON_FAIL(x, kInvalidStream)
38 
H264BitstreamParser()39 H264BitstreamParser::H264BitstreamParser() {}
~H264BitstreamParser()40 H264BitstreamParser::~H264BitstreamParser() {}
41 
ParseNonParameterSetNalu(const uint8_t * source,size_t source_length,uint8_t nalu_type)42 H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu(
43     const uint8_t* source,
44     size_t source_length,
45     uint8_t nalu_type) {
46   if (!sps_ || !pps_)
47     return kInvalidStream;
48 
49   last_slice_qp_delta_ = absl::nullopt;
50   const std::vector<uint8_t> slice_rbsp =
51       H264::ParseRbsp(source, source_length);
52   if (slice_rbsp.size() < H264::kNaluTypeSize)
53     return kInvalidStream;
54 
55   rtc::BitBuffer slice_reader(slice_rbsp.data() + H264::kNaluTypeSize,
56                               slice_rbsp.size() - H264::kNaluTypeSize);
57   // Check to see if this is an IDR slice, which has an extra field to parse
58   // out.
59   bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr;
60   uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
61   uint32_t golomb_tmp;
62   uint32_t bits_tmp;
63 
64   // first_mb_in_slice: ue(v)
65   RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
66   // slice_type: ue(v)
67   uint32_t slice_type;
68   RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&slice_type));
69   // slice_type's 5..9 range is used to indicate that all slices of a picture
70   // have the same value of slice_type % 5, we don't care about that, so we map
71   // to the corresponding 0..4 range.
72   slice_type %= 5;
73   // pic_parameter_set_id: ue(v)
74   RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
75   if (sps_->separate_colour_plane_flag == 1) {
76     // colour_plane_id
77     RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2));
78   }
79   // frame_num: u(v)
80   // Represented by log2_max_frame_num bits.
81   RETURN_INV_ON_FAIL(
82       slice_reader.ReadBits(&bits_tmp, sps_->log2_max_frame_num));
83   uint32_t field_pic_flag = 0;
84   if (sps_->frame_mbs_only_flag == 0) {
85     // field_pic_flag: u(1)
86     RETURN_INV_ON_FAIL(slice_reader.ReadBits(&field_pic_flag, 1));
87     if (field_pic_flag != 0) {
88       // bottom_field_flag: u(1)
89       RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
90     }
91   }
92   if (is_idr) {
93     // idr_pic_id: ue(v)
94     RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
95   }
96   // pic_order_cnt_lsb: u(v)
97   // Represented by sps_.log2_max_pic_order_cnt_lsb bits.
98   if (sps_->pic_order_cnt_type == 0) {
99     RETURN_INV_ON_FAIL(
100         slice_reader.ReadBits(&bits_tmp, sps_->log2_max_pic_order_cnt_lsb));
101     if (pps_->bottom_field_pic_order_in_frame_present_flag &&
102         field_pic_flag == 0) {
103       // delta_pic_order_cnt_bottom: se(v)
104       RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
105     }
106   }
107   if (sps_->pic_order_cnt_type == 1 &&
108       !sps_->delta_pic_order_always_zero_flag) {
109     // delta_pic_order_cnt[0]: se(v)
110     RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
111     if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
112       // delta_pic_order_cnt[1]: se(v)
113       RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
114     }
115   }
116   if (pps_->redundant_pic_cnt_present_flag) {
117     // redundant_pic_cnt: ue(v)
118     RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
119   }
120   if (slice_type == H264::SliceType::kB) {
121     // direct_spatial_mv_pred_flag: u(1)
122     RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
123   }
124   switch (slice_type) {
125     case H264::SliceType::kP:
126     case H264::SliceType::kB:
127     case H264::SliceType::kSp:
128       uint32_t num_ref_idx_active_override_flag;
129       // num_ref_idx_active_override_flag: u(1)
130       RETURN_INV_ON_FAIL(
131           slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
132       if (num_ref_idx_active_override_flag != 0) {
133         // num_ref_idx_l0_active_minus1: ue(v)
134         RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
135         if (slice_type == H264::SliceType::kB) {
136           // num_ref_idx_l1_active_minus1: ue(v)
137           RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
138         }
139       }
140       break;
141     default:
142       break;
143   }
144   // assume nal_unit_type != 20 && nal_unit_type != 21:
145   if (nalu_type == 20 || nalu_type == 21) {
146     RTC_LOG(LS_ERROR) << "Unsupported nal unit type.";
147     return kUnsupportedStream;
148   }
149   // if (nal_unit_type == 20 || nal_unit_type == 21)
150   //   ref_pic_list_mvc_modification()
151   // else
152   {
153     // ref_pic_list_modification():
154     // |slice_type| checks here don't use named constants as they aren't named
155     // in the spec for this segment. Keeping them consistent makes it easier to
156     // verify that they are both the same.
157     if (slice_type % 5 != 2 && slice_type % 5 != 4) {
158       // ref_pic_list_modification_flag_l0: u(1)
159       uint32_t ref_pic_list_modification_flag_l0;
160       RETURN_INV_ON_FAIL(
161           slice_reader.ReadBits(&ref_pic_list_modification_flag_l0, 1));
162       if (ref_pic_list_modification_flag_l0) {
163         uint32_t modification_of_pic_nums_idc;
164         do {
165           // modification_of_pic_nums_idc: ue(v)
166           RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(
167               &modification_of_pic_nums_idc));
168           if (modification_of_pic_nums_idc == 0 ||
169               modification_of_pic_nums_idc == 1) {
170             // abs_diff_pic_num_minus1: ue(v)
171             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
172           } else if (modification_of_pic_nums_idc == 2) {
173             // long_term_pic_num: ue(v)
174             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
175           }
176         } while (modification_of_pic_nums_idc != 3);
177       }
178     }
179     if (slice_type % 5 == 1) {
180       // ref_pic_list_modification_flag_l1: u(1)
181       uint32_t ref_pic_list_modification_flag_l1;
182       RETURN_INV_ON_FAIL(
183           slice_reader.ReadBits(&ref_pic_list_modification_flag_l1, 1));
184       if (ref_pic_list_modification_flag_l1) {
185         uint32_t modification_of_pic_nums_idc;
186         do {
187           // modification_of_pic_nums_idc: ue(v)
188           RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(
189               &modification_of_pic_nums_idc));
190           if (modification_of_pic_nums_idc == 0 ||
191               modification_of_pic_nums_idc == 1) {
192             // abs_diff_pic_num_minus1: ue(v)
193             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
194           } else if (modification_of_pic_nums_idc == 2) {
195             // long_term_pic_num: ue(v)
196             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
197           }
198         } while (modification_of_pic_nums_idc != 3);
199       }
200     }
201   }
202   // TODO(pbos): Do we need support for pred_weight_table()?
203   if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP ||
204                                     slice_type == H264::SliceType::kSp)) ||
205       (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) {
206     RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported.";
207     return kUnsupportedStream;
208   }
209   // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) ||
210   //    (weighted_bipred_idc == 1 && slice_type == B)) {
211   //  pred_weight_table()
212   // }
213   if (nal_ref_idc != 0) {
214     // dec_ref_pic_marking():
215     if (is_idr) {
216       // no_output_of_prior_pics_flag: u(1)
217       // long_term_reference_flag: u(1)
218       RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2));
219     } else {
220       // adaptive_ref_pic_marking_mode_flag: u(1)
221       uint32_t adaptive_ref_pic_marking_mode_flag;
222       RETURN_INV_ON_FAIL(
223           slice_reader.ReadBits(&adaptive_ref_pic_marking_mode_flag, 1));
224       if (adaptive_ref_pic_marking_mode_flag) {
225         uint32_t memory_management_control_operation;
226         do {
227           // memory_management_control_operation: ue(v)
228           RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(
229               &memory_management_control_operation));
230           if (memory_management_control_operation == 1 ||
231               memory_management_control_operation == 3) {
232             // difference_of_pic_nums_minus1: ue(v)
233             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
234           }
235           if (memory_management_control_operation == 2) {
236             // long_term_pic_num: ue(v)
237             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
238           }
239           if (memory_management_control_operation == 3 ||
240               memory_management_control_operation == 6) {
241             // long_term_frame_idx: ue(v)
242             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
243           }
244           if (memory_management_control_operation == 4) {
245             // max_long_term_frame_idx_plus1: ue(v)
246             RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
247           }
248         } while (memory_management_control_operation != 0);
249       }
250     }
251   }
252   if (pps_->entropy_coding_mode_flag && slice_type != H264::SliceType::kI &&
253       slice_type != H264::SliceType::kSi) {
254     // cabac_init_idc: ue(v)
255     RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
256   }
257 
258   int32_t last_slice_qp_delta;
259   RETURN_INV_ON_FAIL(
260       slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta));
261   if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) {
262     // Something has gone wrong, and the parsed value is invalid.
263     RTC_LOG(LS_WARNING) << "Parsed QP value out of range.";
264     return kInvalidStream;
265   }
266 
267   last_slice_qp_delta_ = last_slice_qp_delta;
268   return kOk;
269 }
270 
ParseSlice(const uint8_t * slice,size_t length)271 void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
272   H264::NaluType nalu_type = H264::ParseNaluType(slice[0]);
273   switch (nalu_type) {
274     case H264::NaluType::kSps: {
275       sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize,
276                                  length - H264::kNaluTypeSize);
277       if (!sps_)
278         RTC_LOG(LS_WARNING) << "Unable to parse SPS from H264 bitstream.";
279       break;
280     }
281     case H264::NaluType::kPps: {
282       pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize,
283                                  length - H264::kNaluTypeSize);
284       if (!pps_)
285         RTC_LOG(LS_WARNING) << "Unable to parse PPS from H264 bitstream.";
286       break;
287     }
288     case H264::NaluType::kAud:
289     case H264::NaluType::kSei:
290       break;  // Ignore these nalus, as we don't care about their contents.
291     default:
292       Result res = ParseNonParameterSetNalu(slice, length, nalu_type);
293       if (res != kOk)
294         RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res;
295       break;
296   }
297 }
298 
ParseBitstream(rtc::ArrayView<const uint8_t> bitstream)299 void H264BitstreamParser::ParseBitstream(
300     rtc::ArrayView<const uint8_t> bitstream) {
301   std::vector<H264::NaluIndex> nalu_indices =
302       H264::FindNaluIndices(bitstream.data(), bitstream.size());
303   for (const H264::NaluIndex& index : nalu_indices)
304     ParseSlice(bitstream.data() + index.payload_start_offset,
305                index.payload_size);
306 }
307 
GetLastSliceQp() const308 absl::optional<int> H264BitstreamParser::GetLastSliceQp() const {
309   if (!last_slice_qp_delta_ || !pps_)
310     return absl::nullopt;
311   const int qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
312   if (qp < kMinQpValue || qp > kMaxQpValue) {
313     RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream.";
314     return absl::nullopt;
315   }
316   return qp;
317 }
318 
319 }  // namespace webrtc
320