1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #ifndef MP4_DEMUXER_H264_H_ 6 #define MP4_DEMUXER_H264_H_ 7 8 #include "DecoderData.h" 9 10 namespace mozilla { 11 class BitReader; 12 13 // Spec 7.4.2.1 14 #define MAX_SPS_COUNT 32 15 #define MAX_PPS_COUNT 256 16 17 // NAL unit types 18 enum NAL_TYPES { 19 H264_NAL_SLICE = 1, 20 H264_NAL_DPA = 2, 21 H264_NAL_DPB = 3, 22 H264_NAL_DPC = 4, 23 H264_NAL_IDR_SLICE = 5, 24 H264_NAL_SEI = 6, 25 H264_NAL_SPS = 7, 26 H264_NAL_PPS = 8, 27 H264_NAL_AUD = 9, 28 H264_NAL_END_SEQUENCE = 10, 29 H264_NAL_END_STREAM = 11, 30 H264_NAL_FILLER_DATA = 12, 31 H264_NAL_SPS_EXT = 13, 32 H264_NAL_PREFIX = 14, 33 H264_NAL_AUXILIARY_SLICE = 19, 34 H264_NAL_SLICE_EXT = 20, 35 H264_NAL_SLICE_EXT_DVC = 21, 36 }; 37 38 struct SPSData { 39 bool operator==(const SPSData& aOther) const; 40 bool operator!=(const SPSData& aOther) const; 41 42 bool valid; 43 44 /* Decoded Members */ 45 /* 46 pic_width is the decoded width according to: 47 pic_width = ((pic_width_in_mbs_minus1 + 1) * 16) 48 - (frame_crop_left_offset + frame_crop_right_offset) * 2 49 */ 50 uint32_t pic_width; 51 /* 52 pic_height is the decoded height according to: 53 pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 + 54 1) * 16) 55 - (frame_crop_top_offset + frame_crop_bottom_offset) * 2 56 */ 57 uint32_t pic_height; 58 59 bool interlaced; 60 61 /* 62 Displayed size. 63 display_width and display_height are adjusted according to the display 64 sample aspect ratio. 65 */ 66 uint32_t display_width; 67 uint32_t display_height; 68 69 float sample_ratio; 70 71 uint32_t crop_left; 72 uint32_t crop_right; 73 uint32_t crop_top; 74 uint32_t crop_bottom; 75 76 /* 77 H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en) 78 http://www.itu.int/rec/T-REC-H.264-201402-I/en 79 */ 80 81 bool constraint_set0_flag; 82 bool constraint_set1_flag; 83 bool constraint_set2_flag; 84 bool constraint_set3_flag; 85 bool constraint_set4_flag; 86 bool constraint_set5_flag; 87 88 /* 89 profile_idc and level_idc indicate the profile and level to which the coded 90 video sequence conforms when the SVC sequence parameter set is the active 91 SVC sequence parameter set. 92 */ 93 uint8_t profile_idc; 94 uint8_t level_idc; 95 96 /* 97 seq_parameter_set_id identifies the sequence parameter set that is referred 98 to by the picture parameter set. The value of seq_parameter_set_id shall be 99 in the range of 0 to 31, inclusive. 100 */ 101 uint8_t seq_parameter_set_id; 102 103 /* 104 chroma_format_idc specifies the chroma sampling relative to the luma 105 sampling as specified in clause 6.2. The value of chroma_format_idc shall be 106 in the range of 0 to 3, inclusive. When chroma_format_idc is not present, 107 it shall be inferred to be equal to 1 (4:2:0 chroma format). 108 When profile_idc is equal to 183, chroma_format_idc shall be equal to 0 109 (4:0:0 chroma format). 110 */ 111 uint8_t chroma_format_idc; 112 113 /* 114 bit_depth_luma_minus8 specifies the bit depth of the samples of the luma 115 array and the value of the luma quantisation parameter range offset 116 QpBdOffset Y , as specified by 117 BitDepth Y = 8 + bit_depth_luma_minus8 (7-3) 118 QpBdOffset Y = 6 * bit_depth_luma_minus8 (7-4) 119 When bit_depth_luma_minus8 is not present, it shall be inferred to be equal 120 to 0. bit_depth_luma_minus8 shall be in the range of 0 to 6, inclusive. 121 */ 122 uint8_t bit_depth_luma_minus8; 123 124 /* 125 bit_depth_chroma_minus8 specifies the bit depth of the samples of the chroma 126 arrays and the value of the chroma quantisation parameter range offset 127 QpBdOffset C , as specified by 128 BitDepth C = 8 + bit_depth_chroma_minus8 (7-5) 129 QpBdOffset C = 6 * bit_depth_chroma_minus8 (7-6) 130 When bit_depth_chroma_minus8 is not present, it shall be inferred to be 131 equal to 0. bit_depth_chroma_minus8 shall be in the range of 0 to 6, 132 inclusive. 133 */ 134 uint8_t bit_depth_chroma_minus8; 135 136 /* 137 separate_colour_plane_flag equal to 1 specifies that the three colour 138 components of the 4:4:4 chroma format are coded separately. 139 separate_colour_plane_flag equal to 0 specifies that the colour components 140 are not coded separately. When separate_colour_plane_flag is not present, 141 it shall be inferred to be equal to 0. When separate_colour_plane_flag is 142 equal to 1, the primary coded picture consists of three separate components, 143 each of which consists of coded samples of one colour plane (Y, Cb or Cr) 144 that each use the monochrome coding syntax. In this case, each colour plane 145 is associated with a specific colour_plane_id value. 146 */ 147 bool separate_colour_plane_flag; 148 149 /* 150 seq_scaling_matrix_present_flag equal to 1 specifies that the flags 151 seq_scaling_list_present_flag[ i ] for i = 0..7 or 152 i = 0..11 are present. seq_scaling_matrix_present_flag equal to 0 specifies 153 that these flags are not present and the sequence-level scaling list 154 specified by Flat_4x4_16 shall be inferred for i = 0..5 and the 155 sequence-level scaling list specified by Flat_8x8_16 shall be inferred for 156 i = 6..11. When seq_scaling_matrix_present_flag is not present, it shall be 157 inferred to be equal to 0. 158 */ 159 bool seq_scaling_matrix_present_flag; 160 161 /* 162 log2_max_frame_num_minus4 specifies the value of the variable 163 MaxFrameNum that is used in frame_num related derivations as 164 follows: 165 166 MaxFrameNum = 2( log2_max_frame_num_minus4 + 4 ). The value of 167 log2_max_frame_num_minus4 shall be in the range of 0 to 12, inclusive. 168 */ 169 uint8_t log2_max_frame_num; 170 171 /* 172 pic_order_cnt_type specifies the method to decode picture order 173 count (as specified in subclause 8.2.1). The value of 174 pic_order_cnt_type shall be in the range of 0 to 2, inclusive. 175 */ 176 uint8_t pic_order_cnt_type; 177 178 /* 179 log2_max_pic_order_cnt_lsb_minus4 specifies the value of the 180 variable MaxPicOrderCntLsb that is used in the decoding 181 process for picture order count as specified in subclause 182 8.2.1 as follows: 183 184 MaxPicOrderCntLsb = 2( log2_max_pic_order_cnt_lsb_minus4 + 4 ) 185 186 The value of log2_max_pic_order_cnt_lsb_minus4 shall be in 187 the range of 0 to 12, inclusive. 188 */ 189 uint8_t log2_max_pic_order_cnt_lsb; 190 191 /* 192 delta_pic_order_always_zero_flag equal to 1 specifies that 193 delta_pic_order_cnt[ 0 ] and delta_pic_order_cnt[ 1 ] are 194 not present in the slice headers of the sequence and shall 195 be inferred to be equal to 0. 196 */ 197 bool delta_pic_order_always_zero_flag; 198 199 /* 200 offset_for_non_ref_pic is used to calculate the picture 201 order count of a non-reference picture as specified in 202 8.2.1. The value of offset_for_non_ref_pic shall be in the 203 range of -231 to 231 - 1, inclusive. 204 */ 205 int8_t offset_for_non_ref_pic; 206 207 /* 208 offset_for_top_to_bottom_field is used to calculate the 209 picture order count of a bottom field as specified in 210 subclause 8.2.1. The value of offset_for_top_to_bottom_field 211 shall be in the range of -231 to 231 - 1, inclusive. 212 */ 213 int8_t offset_for_top_to_bottom_field; 214 215 /* 216 max_num_ref_frames specifies the maximum number of short-term and 217 long-term reference frames, complementary reference field pairs, 218 and non-paired reference fields that may be used by the decoding 219 process for inter prediction of any picture in the 220 sequence. max_num_ref_frames also determines the size of the sliding 221 window operation as specified in subclause 8.2.5.3. The value of 222 max_num_ref_frames shall be in the range of 0 to MaxDpbFrames (as 223 specified in subclause A.3.1 or A.3.2), inclusive. 224 */ 225 uint32_t max_num_ref_frames; 226 227 /* 228 gaps_in_frame_num_value_allowed_flag specifies the allowed 229 values of frame_num as specified in subclause 7.4.3 and the 230 decoding process in case of an inferred gap between values of 231 frame_num as specified in subclause 8.2.5.2. 232 */ 233 bool gaps_in_frame_num_allowed_flag; 234 235 /* 236 pic_width_in_mbs_minus1 plus 1 specifies the width of each 237 decoded picture in units of macroblocks. 16 macroblocks in a row 238 */ 239 uint32_t pic_width_in_mbs; 240 241 /* 242 pic_height_in_map_units_minus1 plus 1 specifies the height in 243 slice group map units of a decoded frame or field. 16 244 macroblocks in each column. 245 */ 246 uint32_t pic_height_in_map_units; 247 248 /* 249 frame_mbs_only_flag equal to 0 specifies that coded pictures of 250 the coded video sequence may either be coded fields or coded 251 frames. frame_mbs_only_flag equal to 1 specifies that every 252 coded picture of the coded video sequence is a coded frame 253 containing only frame macroblocks. 254 */ 255 bool frame_mbs_only_flag; 256 257 /* 258 mb_adaptive_frame_field_flag equal to 0 specifies no 259 switching between frame and field macroblocks within a 260 picture. mb_adaptive_frame_field_flag equal to 1 specifies 261 the possible use of switching between frame and field 262 macroblocks within frames. When mb_adaptive_frame_field_flag 263 is not present, it shall be inferred to be equal to 0. 264 */ 265 bool mb_adaptive_frame_field_flag; 266 267 /* 268 direct_8x8_inference_flag specifies the method used in the derivation 269 process for luma motion vectors for B_Skip, B_Direct_16x16 and B_Direct_8x8 270 as specified in clause 8.4.1.2. When frame_mbs_only_flag is equal to 0, 271 direct_8x8_inference_flag shall be equal to 1. 272 */ 273 bool direct_8x8_inference_flag; 274 275 /* 276 frame_cropping_flag equal to 1 specifies that the frame cropping 277 offset parameters follow next in the sequence parameter 278 set. frame_cropping_flag equal to 0 specifies that the frame 279 cropping offset parameters are not present. 280 */ 281 bool frame_cropping_flag; 282 uint32_t frame_crop_left_offset; 283 uint32_t frame_crop_right_offset; 284 uint32_t frame_crop_top_offset; 285 uint32_t frame_crop_bottom_offset; 286 287 // VUI Parameters 288 289 /* 290 vui_parameters_present_flag equal to 1 specifies that the 291 vui_parameters( ) syntax structure as specified in Annex E is 292 present. vui_parameters_present_flag equal to 0 specifies that 293 the vui_parameters( ) syntax structure as specified in Annex E 294 is not present. 295 */ 296 bool vui_parameters_present_flag; 297 298 /* 299 aspect_ratio_info_present_flag equal to 1 specifies that 300 aspect_ratio_idc is present. aspect_ratio_info_present_flag 301 equal to 0 specifies that aspect_ratio_idc is not present. 302 */ 303 bool aspect_ratio_info_present_flag; 304 305 /* 306 aspect_ratio_idc specifies the value of the sample aspect 307 ratio of the luma samples. Table E-1 shows the meaning of 308 the code. When aspect_ratio_idc indicates Extended_SAR, the 309 sample aspect ratio is represented by sar_width and 310 sar_height. When the aspect_ratio_idc syntax element is not 311 present, aspect_ratio_idc value shall be inferred to be 312 equal to 0. 313 */ 314 uint8_t aspect_ratio_idc; 315 uint32_t sar_width; 316 uint32_t sar_height; 317 318 /* 319 video_signal_type_present_flag equal to 1 specifies that video_format, 320 video_full_range_flag and colour_description_present_flag are present. 321 video_signal_type_present_flag equal to 0, specify that video_format, 322 video_full_range_flag and colour_description_present_flag are not present. 323 */ 324 bool video_signal_type_present_flag; 325 326 /* 327 overscan_info_present_flag equal to1 specifies that the 328 overscan_appropriate_flag is present. When overscan_info_present_flag is 329 equal to 0 or is not present, the preferred display method for the video 330 signal is unspecified (Unspecified). 331 */ 332 bool overscan_info_present_flag; 333 /* 334 overscan_appropriate_flag equal to 1 indicates that the cropped decoded 335 pictures output are suitable for display using overscan. 336 overscan_appropriate_flag equal to 0 indicates that the cropped decoded 337 pictures output contain visually important information in the entire region 338 out to the edges of the cropping rectangle of the picture 339 */ 340 bool overscan_appropriate_flag; 341 342 /* 343 video_format indicates the representation of the pictures as specified in 344 Table E-2, before being coded in accordance with this 345 Recommendation | International Standard. When the video_format syntax 346 element is not present, video_format value shall be inferred to be equal 347 to 5. (Unspecified video format) 348 */ 349 uint8_t video_format; 350 351 /* 352 video_full_range_flag indicates the black level and range of the luma and 353 chroma signals as derived from E′Y, E′PB, and E′PR or E′R, E′G, and E′B 354 real-valued component signals. 355 When the video_full_range_flag syntax element is not present, the value of 356 video_full_range_flag shall be inferred to be equal to 0. 357 */ 358 bool video_full_range_flag; 359 360 /* 361 colour_description_present_flag equal to1 specifies that colour_primaries, 362 transfer_characteristics and matrix_coefficients are present. 363 colour_description_present_flag equal to 0 specifies that colour_primaries, 364 transfer_characteristics and matrix_coefficients are not present. 365 */ 366 bool colour_description_present_flag; 367 368 /* 369 colour_primaries indicates the chromaticity coordinates of the source 370 primaries as specified in Table E-3 in terms of the CIE 1931 definition of 371 x and y as specified by ISO 11664-1. 372 When the colour_primaries syntax element is not present, the value of 373 colour_primaries shall be inferred to be equal to 2 (the chromaticity is 374 unspecified or is determined by the application). 375 */ 376 uint8_t colour_primaries; 377 378 /* 379 transfer_characteristics indicates the opto-electronic transfer 380 characteristic of the source picture as specified in Table E-4 as a function 381 of a linear optical intensity input Lc with a nominal real-valued range of 0 382 to 1. 383 When the transfer_characteristics syntax element is not present, the value 384 of transfer_characteristics shall be inferred to be equal to 2 385 (the transfer characteristics are unspecified or are determined by the 386 application). 387 */ 388 uint8_t transfer_characteristics; 389 390 uint8_t matrix_coefficients; 391 bool chroma_loc_info_present_flag; 392 /* 393 The value of chroma_sample_loc_type_top_field and 394 chroma_sample_loc_type_bottom_field shall be in the range of 0 to 5, 395 inclusive 396 */ 397 uint8_t chroma_sample_loc_type_top_field; 398 uint8_t chroma_sample_loc_type_bottom_field; 399 400 bool scaling_matrix_present; 401 uint8_t scaling_matrix4x4[6][16]; 402 uint8_t scaling_matrix8x8[6][64]; 403 404 SPSData(); 405 }; 406 407 struct SEIRecoveryData { 408 /* 409 recovery_frame_cnt specifies the recovery point of output pictures in output 410 order. All decoded pictures in output order are indicated to be correct or 411 approximately correct in content starting at the output order position of 412 the reference picture having the frame_num equal to the frame_num of the VCL 413 NAL units for the current access unit incremented by recovery_frame_cnt in 414 modulo MaxFrameNum arithmetic. recovery_frame_cnt shall be in the range of 0 415 to MaxFrameNum − 1, inclusive. 416 */ 417 uint32_t recovery_frame_cnt = 0; 418 /* 419 exact_match_flag indicates whether decoded pictures at and subsequent to the 420 specified recovery point in output order derived by starting the decoding 421 process at the access unit associated with the recovery point SEI message 422 shall be an exact match to the pictures that would be produced by starting 423 the decoding process at the location of a previous IDR access unit in the 424 NAL unit stream. The value 0 indicates that the match need not be exact and 425 the value 1 indicates that the match shall be exact. 426 */ 427 bool exact_match_flag = false; 428 /* 429 broken_link_flag indicates the presence or absence of a broken link in the 430 NAL unit stream at the location of the recovery point SEI message */ 431 bool broken_link_flag = false; 432 /* 433 changing_slice_group_idc equal to 0 indicates that decoded pictures are 434 correct or approximately correct in content at and subsequent to the 435 recovery point in output order when all macroblocks of the primary coded 436 pictures are decoded within the changing slice group period 437 */ 438 uint8_t changing_slice_group_idc = 0; 439 }; 440 441 class H264 { 442 public: 443 /* Check if out of band extradata contains a SPS NAL */ 444 static bool HasSPS(const mozilla::MediaByteBuffer* aExtraData); 445 // Extract SPS and PPS NALs from aSample by looking into each NALs. 446 // aSample must be in AVCC format. 447 static already_AddRefed<mozilla::MediaByteBuffer> ExtractExtraData( 448 const mozilla::MediaRawData* aSample); 449 // Return true if both extradata are equal. 450 static bool CompareExtraData(const mozilla::MediaByteBuffer* aExtraData1, 451 const mozilla::MediaByteBuffer* aExtraData2); 452 453 // Ensure that SPS data makes sense, Return true if SPS data was, and false 454 // otherwise. If false, then content will be adjusted accordingly. 455 static bool EnsureSPSIsSane(SPSData& aSPS); 456 457 static bool DecodeSPSFromExtraData(const mozilla::MediaByteBuffer* aExtraData, 458 SPSData& aDest); 459 460 // If the given aExtraData is valid, return the aExtraData.max_num_ref_frames 461 // clamped to be in the range of [4, 16]; otherwise return 4. 462 static uint32_t ComputeMaxRefFrames( 463 const mozilla::MediaByteBuffer* aExtraData); 464 465 enum class FrameType { 466 I_FRAME, 467 OTHER, 468 INVALID, 469 }; 470 471 // Returns the frame type. Returns I_FRAME if the sample is an IDR 472 // (Instantaneous Decoding Refresh) Picture. 473 static FrameType GetFrameType(const mozilla::MediaRawData* aSample); 474 475 private: 476 friend class SPSNAL; 477 /* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content. 478 Returns nullptr if invalid content. 479 This is compliant to ITU H.264 7.3.1 Syntax in tabular form NAL unit syntax 480 */ 481 static already_AddRefed<mozilla::MediaByteBuffer> DecodeNALUnit( 482 const uint8_t* aNAL, size_t aLength); 483 /* Decode SPS NAL RBSP and fill SPSData structure */ 484 static bool DecodeSPS(const mozilla::MediaByteBuffer* aSPS, SPSData& aDest); 485 static bool vui_parameters(mozilla::BitReader& aBr, SPSData& aDest); 486 // Read HRD parameters, all data is ignored. 487 static void hrd_parameters(mozilla::BitReader& aBr); 488 static uint8_t NumSPS(const mozilla::MediaByteBuffer* aExtraData); 489 // Decode SEI payload and return true if the SEI NAL indicates a recovery 490 // point. 491 static bool DecodeRecoverySEI(const mozilla::MediaByteBuffer* aSEI, 492 SEIRecoveryData& aDest); 493 }; 494 495 } // namespace mozilla 496 497 #endif // MP4_DEMUXER_H264_H_ 498