1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file contains an implementation of an H264 Annex-B video stream parser. 6 7 #ifndef MEDIA_VIDEO_H264_PARSER_H_ 8 #define MEDIA_VIDEO_H264_PARSER_H_ 9 10 #include <stddef.h> 11 #include <stdint.h> 12 #include <sys/types.h> 13 14 #include <map> 15 #include <memory> 16 #include <vector> 17 18 #include "base/macros.h" 19 #include "base/optional.h" 20 #include "media/base/media_export.h" 21 #include "media/base/ranges.h" 22 #include "media/base/video_codecs.h" 23 #include "media/base/video_color_space.h" 24 #include "media/video/h264_bit_reader.h" 25 26 namespace gfx { 27 class Rect; 28 class Size; 29 } // namespace gfx 30 31 namespace media { 32 33 struct SubsampleEntry; 34 35 // For explanations of each struct and its members, see H.264 specification 36 // at http://www.itu.int/rec/T-REC-H.264. 37 struct MEDIA_EXPORT H264NALU { 38 H264NALU(); 39 40 enum Type { 41 kUnspecified = 0, 42 kNonIDRSlice = 1, 43 kSliceDataA = 2, 44 kSliceDataB = 3, 45 kSliceDataC = 4, 46 kIDRSlice = 5, 47 kSEIMessage = 6, 48 kSPS = 7, 49 kPPS = 8, 50 kAUD = 9, 51 kEOSeq = 10, 52 kEOStream = 11, 53 kFiller = 12, 54 kSPSExt = 13, 55 kReserved14 = 14, 56 kReserved15 = 15, 57 kReserved16 = 16, 58 kReserved17 = 17, 59 kReserved18 = 18, 60 kCodedSliceAux = 19, 61 kCodedSliceExtension = 20, 62 }; 63 64 // After (without) start code; we don't own the underlying memory 65 // and a shallow copy should be made when copying this struct. 66 const uint8_t* data; 67 off_t size; // From after start code to start code of next NALU (or EOS). 68 69 int nal_ref_idc; 70 int nal_unit_type; 71 }; 72 73 enum { 74 kH264ScalingList4x4Length = 16, 75 kH264ScalingList8x8Length = 64, 76 }; 77 78 struct MEDIA_EXPORT H264SPS { 79 H264SPS(); 80 81 enum H264ProfileIDC { 82 kProfileIDCBaseline = 66, 83 kProfileIDCConstrainedBaseline = kProfileIDCBaseline, 84 kProfileIDCMain = 77, 85 kProfileIDScalableBaseline = 83, 86 kProfileIDScalableHigh = 86, 87 kProfileIDCHigh = 100, 88 kProfileIDHigh10 = 110, 89 kProfileIDSMultiviewHigh = 118, 90 kProfileIDHigh422 = 122, 91 kProfileIDStereoHigh = 128, 92 kProfileIDHigh444Predictive = 244, 93 }; 94 95 enum H264LevelIDC : uint8_t { 96 kLevelIDC1p0 = 10, 97 kLevelIDC1B = 9, 98 kLevelIDC1p1 = 11, 99 kLevelIDC1p2 = 12, 100 kLevelIDC1p3 = 13, 101 kLevelIDC2p0 = 20, 102 kLevelIDC2p1 = 21, 103 kLevelIDC2p2 = 22, 104 kLevelIDC3p0 = 30, 105 kLevelIDC3p1 = 31, 106 kLevelIDC3p2 = 32, 107 kLevelIDC4p0 = 40, 108 kLevelIDC4p1 = 41, 109 kLevelIDC4p2 = 42, 110 kLevelIDC5p0 = 50, 111 kLevelIDC5p1 = 51, 112 kLevelIDC5p2 = 52, 113 kLevelIDC6p0 = 60, 114 kLevelIDC6p1 = 61, 115 kLevelIDC6p2 = 62, 116 }; 117 118 enum AspectRatioIdc { 119 kExtendedSar = 255, 120 }; 121 122 enum { 123 // Constants for HRD parameters (spec ch. E.2.2). 124 kBitRateScaleConstantTerm = 6, // Equation E-37. 125 kCPBSizeScaleConstantTerm = 4, // Equation E-38. 126 kDefaultInitialCPBRemovalDelayLength = 24, 127 kDefaultDPBOutputDelayLength = 24, 128 kDefaultTimeOffsetLength = 24, 129 }; 130 131 int profile_idc; 132 bool constraint_set0_flag; 133 bool constraint_set1_flag; 134 bool constraint_set2_flag; 135 bool constraint_set3_flag; 136 bool constraint_set4_flag; 137 bool constraint_set5_flag; 138 int level_idc; 139 int seq_parameter_set_id; 140 141 int chroma_format_idc; 142 bool separate_colour_plane_flag; 143 int bit_depth_luma_minus8; 144 int bit_depth_chroma_minus8; 145 bool qpprime_y_zero_transform_bypass_flag; 146 147 bool seq_scaling_matrix_present_flag; 148 int scaling_list4x4[6][kH264ScalingList4x4Length]; 149 int scaling_list8x8[6][kH264ScalingList8x8Length]; 150 151 int log2_max_frame_num_minus4; 152 int pic_order_cnt_type; 153 int log2_max_pic_order_cnt_lsb_minus4; 154 bool delta_pic_order_always_zero_flag; 155 int offset_for_non_ref_pic; 156 int offset_for_top_to_bottom_field; 157 int num_ref_frames_in_pic_order_cnt_cycle; 158 int expected_delta_per_pic_order_cnt_cycle; // calculated 159 int offset_for_ref_frame[255]; 160 int max_num_ref_frames; 161 bool gaps_in_frame_num_value_allowed_flag; 162 int pic_width_in_mbs_minus1; 163 int pic_height_in_map_units_minus1; 164 bool frame_mbs_only_flag; 165 bool mb_adaptive_frame_field_flag; 166 bool direct_8x8_inference_flag; 167 bool frame_cropping_flag; 168 int frame_crop_left_offset; 169 int frame_crop_right_offset; 170 int frame_crop_top_offset; 171 int frame_crop_bottom_offset; 172 173 bool vui_parameters_present_flag; 174 int sar_width; // Set to 0 when not specified. 175 int sar_height; // Set to 0 when not specified. 176 bool bitstream_restriction_flag; 177 int max_num_reorder_frames; 178 int max_dec_frame_buffering; 179 bool timing_info_present_flag; 180 int num_units_in_tick; 181 int time_scale; 182 bool fixed_frame_rate_flag; 183 184 bool video_signal_type_present_flag; 185 int video_format; 186 bool video_full_range_flag; 187 bool colour_description_present_flag; 188 int colour_primaries; 189 int transfer_characteristics; 190 int matrix_coefficients; 191 192 // TODO(posciak): actually parse these instead of ParseAndIgnoreHRDParameters. 193 bool nal_hrd_parameters_present_flag; 194 int cpb_cnt_minus1; 195 int bit_rate_scale; 196 int cpb_size_scale; 197 int bit_rate_value_minus1[32]; 198 int cpb_size_value_minus1[32]; 199 bool cbr_flag[32]; 200 int initial_cpb_removal_delay_length_minus_1; 201 int cpb_removal_delay_length_minus1; 202 int dpb_output_delay_length_minus1; 203 int time_offset_length; 204 205 bool low_delay_hrd_flag; 206 207 int chroma_array_type; 208 209 // Get corresponding SPS |level_idc| and |constraint_set3_flag| value from 210 // requested |profile| and |level| (see Spec A.3.1). 211 static void GetLevelConfigFromProfileLevel(VideoCodecProfile profile, 212 uint8_t level, 213 int* level_idc, 214 bool* constraint_set3_flag); 215 216 // Helpers to compute frequently-used values. These methods return 217 // base::nullopt if they encounter integer overflow. They do not verify that 218 // the results are in-spec for the given profile or level. 219 base::Optional<gfx::Size> GetCodedSize() const; 220 base::Optional<gfx::Rect> GetVisibleRect() const; 221 VideoColorSpace GetColorSpace() const; 222 223 // Helper to compute indicated level from parsed SPS data. The value of 224 // indicated level would be included in H264LevelIDC enum representing the 225 // level as in name. 226 uint8_t GetIndicatedLevel() const; 227 // Helper to check if indicated level is lower than or equal to 228 // |target_level|. 229 bool CheckIndicatedLevelWithinTarget(uint8_t target_level) const; 230 }; 231 232 struct MEDIA_EXPORT H264PPS { 233 H264PPS(); 234 235 int pic_parameter_set_id; 236 int seq_parameter_set_id; 237 bool entropy_coding_mode_flag; 238 bool bottom_field_pic_order_in_frame_present_flag; 239 int num_slice_groups_minus1; 240 // TODO(posciak): Slice groups not implemented, could be added at some point. 241 int num_ref_idx_l0_default_active_minus1; 242 int num_ref_idx_l1_default_active_minus1; 243 bool weighted_pred_flag; 244 int weighted_bipred_idc; 245 int pic_init_qp_minus26; 246 int pic_init_qs_minus26; 247 int chroma_qp_index_offset; 248 bool deblocking_filter_control_present_flag; 249 bool constrained_intra_pred_flag; 250 bool redundant_pic_cnt_present_flag; 251 bool transform_8x8_mode_flag; 252 253 bool pic_scaling_matrix_present_flag; 254 int scaling_list4x4[6][kH264ScalingList4x4Length]; 255 int scaling_list8x8[6][kH264ScalingList8x8Length]; 256 257 int second_chroma_qp_index_offset; 258 }; 259 260 struct MEDIA_EXPORT H264ModificationOfPicNum { 261 int modification_of_pic_nums_idc; 262 union { 263 int abs_diff_pic_num_minus1; 264 int long_term_pic_num; 265 }; 266 }; 267 268 struct MEDIA_EXPORT H264WeightingFactors { 269 bool luma_weight_flag; 270 bool chroma_weight_flag; 271 int luma_weight[32]; 272 int luma_offset[32]; 273 int chroma_weight[32][2]; 274 int chroma_offset[32][2]; 275 }; 276 277 struct MEDIA_EXPORT H264DecRefPicMarking { 278 int memory_mgmnt_control_operation; 279 int difference_of_pic_nums_minus1; 280 int long_term_pic_num; 281 int long_term_frame_idx; 282 int max_long_term_frame_idx_plus1; 283 }; 284 285 struct MEDIA_EXPORT H264SliceHeader { 286 H264SliceHeader(); 287 288 enum { kRefListSize = 32, kRefListModSize = kRefListSize }; 289 290 enum Type { 291 kPSlice = 0, 292 kBSlice = 1, 293 kISlice = 2, 294 kSPSlice = 3, 295 kSISlice = 4, 296 }; 297 298 bool IsPSlice() const; 299 bool IsBSlice() const; 300 bool IsISlice() const; 301 bool IsSPSlice() const; 302 bool IsSISlice() const; 303 304 bool idr_pic_flag; // from NAL header 305 int nal_ref_idc; // from NAL header 306 const uint8_t* nalu_data; // from NAL header 307 off_t nalu_size; // from NAL header 308 off_t header_bit_size; // calculated 309 310 int first_mb_in_slice; 311 int slice_type; 312 int pic_parameter_set_id; 313 int colour_plane_id; // TODO(posciak): use this! http://crbug.com/139878 314 int frame_num; 315 bool field_pic_flag; 316 bool bottom_field_flag; 317 int idr_pic_id; 318 int pic_order_cnt_lsb; 319 int delta_pic_order_cnt_bottom; 320 int delta_pic_order_cnt0; 321 int delta_pic_order_cnt1; 322 int redundant_pic_cnt; 323 bool direct_spatial_mv_pred_flag; 324 325 bool num_ref_idx_active_override_flag; 326 int num_ref_idx_l0_active_minus1; 327 int num_ref_idx_l1_active_minus1; 328 bool ref_pic_list_modification_flag_l0; 329 bool ref_pic_list_modification_flag_l1; 330 H264ModificationOfPicNum ref_list_l0_modifications[kRefListModSize]; 331 H264ModificationOfPicNum ref_list_l1_modifications[kRefListModSize]; 332 333 int luma_log2_weight_denom; 334 int chroma_log2_weight_denom; 335 336 bool luma_weight_l0_flag; 337 bool chroma_weight_l0_flag; 338 H264WeightingFactors pred_weight_table_l0; 339 340 bool luma_weight_l1_flag; 341 bool chroma_weight_l1_flag; 342 H264WeightingFactors pred_weight_table_l1; 343 344 bool no_output_of_prior_pics_flag; 345 bool long_term_reference_flag; 346 347 bool adaptive_ref_pic_marking_mode_flag; 348 H264DecRefPicMarking ref_pic_marking[kRefListSize]; 349 350 int cabac_init_idc; 351 int slice_qp_delta; 352 bool sp_for_switch_flag; 353 int slice_qs_delta; 354 int disable_deblocking_filter_idc; 355 int slice_alpha_c0_offset_div2; 356 int slice_beta_offset_div2; 357 358 // Calculated. 359 // Size in bits of dec_ref_pic_marking() syntax element. 360 size_t dec_ref_pic_marking_bit_size; 361 size_t pic_order_cnt_bit_size; 362 }; 363 364 struct H264SEIRecoveryPoint { 365 int recovery_frame_cnt; 366 bool exact_match_flag; 367 bool broken_link_flag; 368 int changing_slice_group_idc; 369 }; 370 371 struct MEDIA_EXPORT H264SEIMessage { 372 H264SEIMessage(); 373 374 enum Type { 375 kSEIRecoveryPoint = 6, 376 }; 377 378 int type; 379 int payload_size; 380 union { 381 // Placeholder; in future more supported types will contribute to more 382 // union members here. 383 H264SEIRecoveryPoint recovery_point; 384 }; 385 }; 386 387 // Class to parse an Annex-B H.264 stream, 388 // as specified in chapters 7 and Annex B of the H.264 spec. 389 class MEDIA_EXPORT H264Parser { 390 public: 391 enum Result { 392 kOk, 393 kInvalidStream, // error in stream 394 kUnsupportedStream, // stream not supported by the parser 395 kEOStream, // end of stream 396 }; 397 398 // Find offset from start of data to next NALU start code 399 // and size of found start code (3 or 4 bytes). 400 // If no start code is found, offset is pointing to the first unprocessed byte 401 // (i.e. the first byte that was not considered as a possible start of a start 402 // code) and |*start_code_size| is set to 0. 403 // Preconditions: 404 // - |data_size| >= 0 405 // Postconditions: 406 // - |*offset| is between 0 and |data_size| included. 407 // It is strictly less than |data_size| if |data_size| > 0. 408 // - |*start_code_size| is either 0, 3 or 4. 409 static bool FindStartCode(const uint8_t* data, 410 off_t data_size, 411 off_t* offset, 412 off_t* start_code_size); 413 414 // Wrapper for FindStartCode() that skips over start codes that 415 // may appear inside of |encrypted_ranges_|. 416 // Returns true if a start code was found. Otherwise returns false. 417 static bool FindStartCodeInClearRanges(const uint8_t* data, 418 off_t data_size, 419 const Ranges<const uint8_t*>& ranges, 420 off_t* offset, 421 off_t* start_code_size); 422 423 static VideoCodecProfile ProfileIDCToVideoCodecProfile(int profile_idc); 424 425 // Parses the input stream and returns all the NALUs through |nalus|. Returns 426 // false if the stream is invalid. 427 static bool ParseNALUs(const uint8_t* stream, 428 size_t stream_size, 429 std::vector<H264NALU>* nalus); 430 431 H264Parser(); 432 ~H264Parser(); 433 434 void Reset(); 435 // Set current stream pointer to |stream| of |stream_size| in bytes, 436 // |stream| owned by caller. 437 // |subsamples| contains information about what parts of |stream| are 438 // encrypted. 439 void SetStream(const uint8_t* stream, off_t stream_size); 440 void SetEncryptedStream(const uint8_t* stream, 441 off_t stream_size, 442 const std::vector<SubsampleEntry>& subsamples); 443 444 // Read the stream to find the next NALU, identify it and return 445 // that information in |*nalu|. This advances the stream to the beginning 446 // of this NALU, but not past it, so subsequent calls to NALU-specific 447 // parsing functions (ParseSPS, etc.) will parse this NALU. 448 // If the caller wishes to skip the current NALU, it can call this function 449 // again, instead of any NALU-type specific parse functions below. 450 Result AdvanceToNextNALU(H264NALU* nalu); 451 452 // NALU-specific parsing functions. 453 // These should be called after AdvanceToNextNALU(). 454 455 // SPSes and PPSes are owned by the parser class and the memory for their 456 // structures is managed here, not by the caller, as they are reused 457 // across NALUs. 458 // 459 // Parse an SPS/PPS NALU and save their data in the parser, returning id 460 // of the parsed structure in |*pps_id|/|*sps_id|. 461 // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), 462 // passing the returned |*sps_id|/|*pps_id| as parameter. 463 // TODO(posciak,fischman): consider replacing returning Result from Parse*() 464 // methods with a scoped_ptr and adding an AtEOS() function to check for EOS 465 // if Parse*() return NULL. 466 Result ParseSPS(int* sps_id); 467 Result ParsePPS(int* pps_id); 468 469 // Parses the SPS ID from the SPSExt, but otherwise does nothing. 470 Result ParseSPSExt(int* sps_id); 471 472 // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not 473 // present. 474 const H264SPS* GetSPS(int sps_id) const; 475 const H264PPS* GetPPS(int pps_id) const; 476 477 // Slice headers and SEI messages are not used across NALUs by the parser 478 // and can be discarded after current NALU, so the parser does not store 479 // them, nor does it manage their memory. 480 // The caller has to provide and manage it instead. 481 482 // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to 483 // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. 484 Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); 485 486 // Parse a SEI message, returning it in |*sei_msg|, provided and managed 487 // by the caller. 488 Result ParseSEI(H264SEIMessage* sei_msg); 489 490 // The return value of this method changes for every successful call to 491 // AdvanceToNextNALU(). 492 // This returns the subsample information for the last NALU that was output 493 // from AdvanceToNextNALU(). 494 std::vector<SubsampleEntry> GetCurrentSubsamples(); 495 496 private: 497 // Move the stream pointer to the beginning of the next NALU, 498 // i.e. pointing at the next start code. 499 // Return true if a NALU has been found. 500 // If a NALU is found: 501 // - its size in bytes is returned in |*nalu_size| and includes 502 // the start code as well as the trailing zero bits. 503 // - the size in bytes of the start code is returned in |*start_code_size|. 504 bool LocateNALU(off_t* nalu_size, off_t* start_code_size); 505 506 // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. 507 // Read one unsigned exp-Golomb code from the stream and return in |*val|. 508 Result ReadUE(int* val); 509 510 // Read one signed exp-Golomb code from the stream and return in |*val|. 511 Result ReadSE(int* val); 512 513 // Parse scaling lists (see spec). 514 Result ParseScalingList(int size, int* scaling_list, bool* use_default); 515 Result ParseSPSScalingLists(H264SPS* sps); 516 Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); 517 518 // Parse optional VUI parameters in SPS (see spec). 519 Result ParseVUIParameters(H264SPS* sps); 520 // Set |hrd_parameters_present| to true only if they are present. 521 Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); 522 523 // Parse reference picture lists' modifications (see spec). 524 Result ParseRefPicListModifications(H264SliceHeader* shdr); 525 Result ParseRefPicListModification(int num_ref_idx_active_minus1, 526 H264ModificationOfPicNum* ref_list_mods); 527 528 // Parse prediction weight table (see spec). 529 Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); 530 531 // Parse weighting factors (see spec). 532 Result ParseWeightingFactors(int num_ref_idx_active_minus1, 533 int chroma_array_type, 534 int luma_log2_weight_denom, 535 int chroma_log2_weight_denom, 536 H264WeightingFactors* w_facts); 537 538 // Parse decoded reference picture marking information (see spec). 539 Result ParseDecRefPicMarking(H264SliceHeader* shdr); 540 541 // Pointer to the current NALU in the stream. 542 const uint8_t* stream_; 543 544 // Bytes left in the stream after the current NALU. 545 off_t bytes_left_; 546 547 H264BitReader br_; 548 549 // PPSes and SPSes stored for future reference. 550 std::map<int, std::unique_ptr<H264SPS>> active_SPSes_; 551 std::map<int, std::unique_ptr<H264PPS>> active_PPSes_; 552 553 // Ranges of encrypted bytes in the buffer passed to 554 // SetEncryptedStream(). 555 Ranges<const uint8_t*> encrypted_ranges_; 556 557 // This contains the range of the previous NALU found in 558 // AdvanceToNextNalu(). Holds exactly one range. 559 Ranges<const uint8_t*> previous_nalu_range_; 560 561 DISALLOW_COPY_AND_ASSIGN(H264Parser); 562 }; 563 564 } // namespace media 565 566 #endif // MEDIA_VIDEO_H264_PARSER_H_ 567