1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #ifndef MP4_DEMUXER_H264_H_
6 #define MP4_DEMUXER_H264_H_
7 
8 #include "DecoderData.h"
9 
10 namespace mozilla {
11 class BitReader;
12 
13 // Spec 7.4.2.1
14 #define MAX_SPS_COUNT 32
15 #define MAX_PPS_COUNT 256
16 
17 // NAL unit types
18 enum NAL_TYPES {
19   H264_NAL_SLICE = 1,
20   H264_NAL_DPA = 2,
21   H264_NAL_DPB = 3,
22   H264_NAL_DPC = 4,
23   H264_NAL_IDR_SLICE = 5,
24   H264_NAL_SEI = 6,
25   H264_NAL_SPS = 7,
26   H264_NAL_PPS = 8,
27   H264_NAL_AUD = 9,
28   H264_NAL_END_SEQUENCE = 10,
29   H264_NAL_END_STREAM = 11,
30   H264_NAL_FILLER_DATA = 12,
31   H264_NAL_SPS_EXT = 13,
32   H264_NAL_PREFIX = 14,
33   H264_NAL_AUXILIARY_SLICE = 19,
34   H264_NAL_SLICE_EXT = 20,
35   H264_NAL_SLICE_EXT_DVC = 21,
36 };
37 
38 struct SPSData {
39   bool operator==(const SPSData& aOther) const;
40   bool operator!=(const SPSData& aOther) const;
41 
42   bool valid;
43 
44   /* Decoded Members */
45   /*
46     pic_width is the decoded width according to:
47     pic_width = ((pic_width_in_mbs_minus1 + 1) * 16)
48                 - (frame_crop_left_offset + frame_crop_right_offset) * 2
49    */
50   uint32_t pic_width;
51   /*
52     pic_height is the decoded height according to:
53     pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 +
54     1) * 16)
55                  - (frame_crop_top_offset + frame_crop_bottom_offset) * 2
56    */
57   uint32_t pic_height;
58 
59   bool interlaced;
60 
61   /*
62    Displayed size.
63    display_width and display_height are adjusted according to the display
64    sample aspect ratio.
65    */
66   uint32_t display_width;
67   uint32_t display_height;
68 
69   float sample_ratio;
70 
71   uint32_t crop_left;
72   uint32_t crop_right;
73   uint32_t crop_top;
74   uint32_t crop_bottom;
75 
76   /*
77     H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en)
78    http://www.itu.int/rec/T-REC-H.264-201402-I/en
79    */
80 
81   bool constraint_set0_flag;
82   bool constraint_set1_flag;
83   bool constraint_set2_flag;
84   bool constraint_set3_flag;
85   bool constraint_set4_flag;
86   bool constraint_set5_flag;
87 
88   /*
89     profile_idc and level_idc indicate the profile and level to which the coded
90     video sequence conforms when the SVC sequence parameter set is the active
91     SVC sequence parameter set.
92    */
93   uint8_t profile_idc;
94   uint8_t level_idc;
95 
96   /*
97     seq_parameter_set_id identifies the sequence parameter set that is referred
98     to by the picture parameter set. The value of seq_parameter_set_id shall be
99     in the range of 0 to 31, inclusive.
100    */
101   uint8_t seq_parameter_set_id;
102 
103   /*
104     chroma_format_idc specifies the chroma sampling relative to the luma
105     sampling as specified in clause 6.2. The value of chroma_format_idc shall be
106     in the range of 0 to 3, inclusive. When chroma_format_idc is not present,
107     it shall be inferred to be equal to 1 (4:2:0 chroma format).
108     When profile_idc is equal to 183, chroma_format_idc shall be equal to 0
109     (4:0:0 chroma format).
110    */
111   uint8_t chroma_format_idc;
112 
113   /*
114     bit_depth_luma_minus8 specifies the bit depth of the samples of the luma
115     array and the value of the luma quantisation parameter range offset
116     QpBdOffset Y , as specified by
117       BitDepth Y = 8 + bit_depth_luma_minus8 (7-3)
118       QpBdOffset Y = 6 * bit_depth_luma_minus8 (7-4)
119     When bit_depth_luma_minus8 is not present, it shall be inferred to be equal
120     to 0. bit_depth_luma_minus8 shall be in the range of 0 to 6, inclusive.
121   */
122   uint8_t bit_depth_luma_minus8;
123 
124   /*
125     bit_depth_chroma_minus8 specifies the bit depth of the samples of the chroma
126     arrays and the value of the chroma quantisation parameter range offset
127     QpBdOffset C , as specified by
128       BitDepth C = 8 + bit_depth_chroma_minus8 (7-5)
129       QpBdOffset C = 6 * bit_depth_chroma_minus8 (7-6)
130     When bit_depth_chroma_minus8 is not present, it shall be inferred to be
131     equal to 0. bit_depth_chroma_minus8 shall be in the range of 0 to 6,
132     inclusive.
133   */
134   uint8_t bit_depth_chroma_minus8;
135 
136   /*
137     separate_colour_plane_flag equal to 1 specifies that the three colour
138     components of the 4:4:4 chroma format are coded separately.
139     separate_colour_plane_flag equal to 0 specifies that the colour components
140     are not coded separately. When separate_colour_plane_flag is not present,
141     it shall be inferred to be equal to 0. When separate_colour_plane_flag is
142     equal to 1, the primary coded picture consists of three separate components,
143     each of which consists of coded samples of one colour plane (Y, Cb or Cr)
144     that each use the monochrome coding syntax. In this case, each colour plane
145     is associated with a specific colour_plane_id value.
146    */
147   bool separate_colour_plane_flag;
148 
149   /*
150      seq_scaling_matrix_present_flag equal to 1 specifies that the flags
151      seq_scaling_list_present_flag[ i ] for i = 0..7 or
152      i = 0..11 are present. seq_scaling_matrix_present_flag equal to 0 specifies
153      that these flags are not present and the sequence-level scaling list
154      specified by Flat_4x4_16 shall be inferred for i = 0..5 and the
155      sequence-level scaling list specified by Flat_8x8_16 shall be inferred for
156      i = 6..11. When seq_scaling_matrix_present_flag is not present, it shall be
157      inferred to be equal to 0.
158      */
159   bool seq_scaling_matrix_present_flag;
160 
161   /*
162     log2_max_frame_num_minus4 specifies the value of the variable
163     MaxFrameNum that is used in frame_num related derivations as
164     follows:
165 
166      MaxFrameNum = 2( log2_max_frame_num_minus4 + 4 ). The value of
167     log2_max_frame_num_minus4 shall be in the range of 0 to 12, inclusive.
168    */
169   uint8_t log2_max_frame_num;
170 
171   /*
172     pic_order_cnt_type specifies the method to decode picture order
173     count (as specified in subclause 8.2.1). The value of
174     pic_order_cnt_type shall be in the range of 0 to 2, inclusive.
175    */
176   uint8_t pic_order_cnt_type;
177 
178   /*
179     log2_max_pic_order_cnt_lsb_minus4 specifies the value of the
180     variable MaxPicOrderCntLsb that is used in the decoding
181     process for picture order count as specified in subclause
182     8.2.1 as follows:
183 
184     MaxPicOrderCntLsb = 2( log2_max_pic_order_cnt_lsb_minus4 + 4 )
185 
186     The value of log2_max_pic_order_cnt_lsb_minus4 shall be in
187     the range of 0 to 12, inclusive.
188    */
189   uint8_t log2_max_pic_order_cnt_lsb;
190 
191   /*
192     delta_pic_order_always_zero_flag equal to 1 specifies that
193     delta_pic_order_cnt[ 0 ] and delta_pic_order_cnt[ 1 ] are
194     not present in the slice headers of the sequence and shall
195     be inferred to be equal to 0.
196    */
197   bool delta_pic_order_always_zero_flag;
198 
199   /*
200     offset_for_non_ref_pic is used to calculate the picture
201     order count of a non-reference picture as specified in
202     8.2.1. The value of offset_for_non_ref_pic shall be in the
203     range of -231 to 231 - 1, inclusive.
204    */
205   int8_t offset_for_non_ref_pic;
206 
207   /*
208     offset_for_top_to_bottom_field is used to calculate the
209     picture order count of a bottom field as specified in
210     subclause 8.2.1. The value of offset_for_top_to_bottom_field
211     shall be in the range of -231 to 231 - 1, inclusive.
212    */
213   int8_t offset_for_top_to_bottom_field;
214 
215   /*
216     max_num_ref_frames specifies the maximum number of short-term and
217     long-term reference frames, complementary reference field pairs,
218     and non-paired reference fields that may be used by the decoding
219     process for inter prediction of any picture in the
220     sequence. max_num_ref_frames also determines the size of the sliding
221     window operation as specified in subclause 8.2.5.3. The value of
222     max_num_ref_frames shall be in the range of 0 to MaxDpbFrames (as
223     specified in subclause A.3.1 or A.3.2), inclusive.
224    */
225   uint32_t max_num_ref_frames;
226 
227   /*
228     gaps_in_frame_num_value_allowed_flag specifies the allowed
229     values of frame_num as specified in subclause 7.4.3 and the
230     decoding process in case of an inferred gap between values of
231     frame_num as specified in subclause 8.2.5.2.
232    */
233   bool gaps_in_frame_num_allowed_flag;
234 
235   /*
236     pic_width_in_mbs_minus1 plus 1 specifies the width of each
237     decoded picture in units of macroblocks.  16 macroblocks in a row
238    */
239   uint32_t pic_width_in_mbs;
240 
241   /*
242     pic_height_in_map_units_minus1 plus 1 specifies the height in
243     slice group map units of a decoded frame or field.  16
244     macroblocks in each column.
245    */
246   uint32_t pic_height_in_map_units;
247 
248   /*
249     frame_mbs_only_flag equal to 0 specifies that coded pictures of
250     the coded video sequence may either be coded fields or coded
251     frames. frame_mbs_only_flag equal to 1 specifies that every
252     coded picture of the coded video sequence is a coded frame
253     containing only frame macroblocks.
254    */
255   bool frame_mbs_only_flag;
256 
257   /*
258     mb_adaptive_frame_field_flag equal to 0 specifies no
259     switching between frame and field macroblocks within a
260     picture. mb_adaptive_frame_field_flag equal to 1 specifies
261     the possible use of switching between frame and field
262     macroblocks within frames. When mb_adaptive_frame_field_flag
263     is not present, it shall be inferred to be equal to 0.
264    */
265   bool mb_adaptive_frame_field_flag;
266 
267   /*
268     direct_8x8_inference_flag specifies the method used in the derivation
269     process for luma motion vectors for B_Skip, B_Direct_16x16 and B_Direct_8x8
270     as specified in clause 8.4.1.2. When frame_mbs_only_flag is equal to 0,
271     direct_8x8_inference_flag shall be equal to 1.
272   */
273   bool direct_8x8_inference_flag;
274 
275   /*
276     frame_cropping_flag equal to 1 specifies that the frame cropping
277     offset parameters follow next in the sequence parameter
278     set. frame_cropping_flag equal to 0 specifies that the frame
279     cropping offset parameters are not present.
280    */
281   bool frame_cropping_flag;
282   uint32_t frame_crop_left_offset;
283   uint32_t frame_crop_right_offset;
284   uint32_t frame_crop_top_offset;
285   uint32_t frame_crop_bottom_offset;
286 
287   // VUI Parameters
288 
289   /*
290     vui_parameters_present_flag equal to 1 specifies that the
291     vui_parameters( ) syntax structure as specified in Annex E is
292     present. vui_parameters_present_flag equal to 0 specifies that
293     the vui_parameters( ) syntax structure as specified in Annex E
294     is not present.
295    */
296   bool vui_parameters_present_flag;
297 
298   /*
299    aspect_ratio_info_present_flag equal to 1 specifies that
300    aspect_ratio_idc is present. aspect_ratio_info_present_flag
301    equal to 0 specifies that aspect_ratio_idc is not present.
302    */
303   bool aspect_ratio_info_present_flag;
304 
305   /*
306     aspect_ratio_idc specifies the value of the sample aspect
307     ratio of the luma samples. Table E-1 shows the meaning of
308     the code. When aspect_ratio_idc indicates Extended_SAR, the
309     sample aspect ratio is represented by sar_width and
310     sar_height. When the aspect_ratio_idc syntax element is not
311     present, aspect_ratio_idc value shall be inferred to be
312     equal to 0.
313    */
314   uint8_t aspect_ratio_idc;
315   uint32_t sar_width;
316   uint32_t sar_height;
317 
318   /*
319     video_signal_type_present_flag equal to 1 specifies that video_format,
320     video_full_range_flag and colour_description_present_flag are present.
321     video_signal_type_present_flag equal to 0, specify that video_format,
322     video_full_range_flag and colour_description_present_flag are not present.
323    */
324   bool video_signal_type_present_flag;
325 
326   /*
327     overscan_info_present_flag equal to1 specifies that the
328     overscan_appropriate_flag is present. When overscan_info_present_flag is
329     equal to 0 or is not present, the preferred display method for the video
330     signal is unspecified (Unspecified).
331    */
332   bool overscan_info_present_flag;
333   /*
334     overscan_appropriate_flag equal to 1 indicates that the cropped decoded
335     pictures output are suitable for display using overscan.
336     overscan_appropriate_flag equal to 0 indicates that the cropped decoded
337     pictures output contain visually important information in the entire region
338     out to the edges of the cropping rectangle of the picture
339    */
340   bool overscan_appropriate_flag;
341 
342   /*
343     video_format indicates the representation of the pictures as specified in
344     Table E-2, before being coded in accordance with this
345     Recommendation | International Standard. When the video_format syntax
346     element is not present, video_format value shall be inferred to be equal
347     to 5. (Unspecified video format)
348    */
349   uint8_t video_format;
350 
351   /*
352     video_full_range_flag indicates the black level and range of the luma and
353     chroma signals as derived from E′Y, E′PB, and E′PR or E′R, E′G, and E′B
354     real-valued component signals.
355     When the video_full_range_flag syntax element is not present, the value of
356     video_full_range_flag shall be inferred to be equal to 0.
357    */
358   bool video_full_range_flag;
359 
360   /*
361     colour_description_present_flag equal to1 specifies that colour_primaries,
362     transfer_characteristics and matrix_coefficients are present.
363     colour_description_present_flag equal to 0 specifies that colour_primaries,
364     transfer_characteristics and matrix_coefficients are not present.
365    */
366   bool colour_description_present_flag;
367 
368   /*
369     colour_primaries indicates the chromaticity coordinates of the source
370     primaries as specified in Table E-3 in terms of the CIE 1931 definition of
371     x and y as specified by ISO 11664-1.
372     When the colour_primaries syntax element is not present, the value of
373     colour_primaries shall be inferred to be equal to 2 (the chromaticity is
374     unspecified or is determined by the application).
375    */
376   uint8_t colour_primaries;
377 
378   /*
379     transfer_characteristics indicates the opto-electronic transfer
380     characteristic of the source picture as specified in Table E-4 as a function
381     of a linear optical intensity input Lc with a nominal real-valued range of 0
382     to 1.
383     When the transfer_characteristics syntax element is not present, the value
384     of transfer_characteristics shall be inferred to be equal to 2
385     (the transfer characteristics are unspecified or are determined by the
386     application).
387    */
388   uint8_t transfer_characteristics;
389 
390   uint8_t matrix_coefficients;
391   bool chroma_loc_info_present_flag;
392   /*
393     The value of chroma_sample_loc_type_top_field and
394     chroma_sample_loc_type_bottom_field shall be in the range of 0 to 5,
395     inclusive
396   */
397   uint8_t chroma_sample_loc_type_top_field;
398   uint8_t chroma_sample_loc_type_bottom_field;
399 
400   bool scaling_matrix_present;
401   uint8_t scaling_matrix4x4[6][16];
402   uint8_t scaling_matrix8x8[6][64];
403 
404   SPSData();
405 };
406 
407 struct SEIRecoveryData {
408   /*
409     recovery_frame_cnt specifies the recovery point of output pictures in output
410     order. All decoded pictures in output order are indicated to be correct or
411     approximately correct in content starting at the output order position of
412     the reference picture having the frame_num equal to the frame_num of the VCL
413     NAL units for the current access unit incremented by recovery_frame_cnt in
414     modulo MaxFrameNum arithmetic. recovery_frame_cnt shall be in the range of 0
415     to MaxFrameNum − 1, inclusive.
416   */
417   uint32_t recovery_frame_cnt = 0;
418   /*
419     exact_match_flag indicates whether decoded pictures at and subsequent to the
420     specified recovery point in output order derived by starting the decoding
421     process at the access unit associated with the recovery point SEI message
422     shall be an exact match to the pictures that would be produced by starting
423     the decoding process at the location of a previous IDR access unit in the
424     NAL unit stream. The value 0 indicates that the match need not be exact and
425     the value 1 indicates that the match shall be exact.
426   */
427   bool exact_match_flag = false;
428   /*
429     broken_link_flag indicates the presence or absence of a broken link in the
430     NAL unit stream at the location of the recovery point SEI message */
431   bool broken_link_flag = false;
432   /*
433     changing_slice_group_idc equal to 0 indicates that decoded pictures are
434     correct or approximately correct in content at and subsequent to the
435     recovery point in output order when all macroblocks of the primary coded
436     pictures are decoded within the changing slice group period
437   */
438   uint8_t changing_slice_group_idc = 0;
439 };
440 
441 class H264 {
442  public:
443   /* Check if out of band extradata contains a SPS NAL */
444   static bool HasSPS(const mozilla::MediaByteBuffer* aExtraData);
445   // Extract SPS and PPS NALs from aSample by looking into each NALs.
446   // aSample must be in AVCC format.
447   static already_AddRefed<mozilla::MediaByteBuffer> ExtractExtraData(
448       const mozilla::MediaRawData* aSample);
449   // Return true if both extradata are equal.
450   static bool CompareExtraData(const mozilla::MediaByteBuffer* aExtraData1,
451                                const mozilla::MediaByteBuffer* aExtraData2);
452 
453   // Ensure that SPS data makes sense, Return true if SPS data was, and false
454   // otherwise. If false, then content will be adjusted accordingly.
455   static bool EnsureSPSIsSane(SPSData& aSPS);
456 
457   static bool DecodeSPSFromExtraData(const mozilla::MediaByteBuffer* aExtraData,
458                                      SPSData& aDest);
459 
460   // If the given aExtraData is valid, return the aExtraData.max_num_ref_frames
461   // clamped to be in the range of [4, 16]; otherwise return 4.
462   static uint32_t ComputeMaxRefFrames(
463       const mozilla::MediaByteBuffer* aExtraData);
464 
465   enum class FrameType {
466     I_FRAME,
467     OTHER,
468     INVALID,
469   };
470 
471   // Returns the frame type. Returns I_FRAME if the sample is an IDR
472   // (Instantaneous Decoding Refresh) Picture.
473   static FrameType GetFrameType(const mozilla::MediaRawData* aSample);
474 
475  private:
476   friend class SPSNAL;
477   /* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content.
478      Returns nullptr if invalid content.
479      This is compliant to ITU H.264 7.3.1 Syntax in tabular form NAL unit syntax
480    */
481   static already_AddRefed<mozilla::MediaByteBuffer> DecodeNALUnit(
482       const uint8_t* aNAL, size_t aLength);
483   /* Decode SPS NAL RBSP and fill SPSData structure */
484   static bool DecodeSPS(const mozilla::MediaByteBuffer* aSPS, SPSData& aDest);
485   static bool vui_parameters(mozilla::BitReader& aBr, SPSData& aDest);
486   // Read HRD parameters, all data is ignored.
487   static void hrd_parameters(mozilla::BitReader& aBr);
488   static uint8_t NumSPS(const mozilla::MediaByteBuffer* aExtraData);
489   // Decode SEI payload and return true if the SEI NAL indicates a recovery
490   // point.
491   static bool DecodeRecoverySEI(const mozilla::MediaByteBuffer* aSEI,
492                                 SEIRecoveryData& aDest);
493 };
494 
495 }  // namespace mozilla
496 
497 #endif  // MP4_DEMUXER_H264_H_
498