1 /** HEVC video helper functions
2 
3    mkvmerge -- utility for splicing together matroska files
4    from component media subtypes
5 
6    Distributed under the GPL v2
7    see the file COPYING for details
8    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
9 
10    \file
11 
12 */
13 
14 #pragma once
15 
16 #include "common/common_pch.h"
17 
18 namespace mtx::hevc {
19 
20 // VCL NALs
21 constexpr auto NALU_TYPE_TRAIL_N                        = 0;
22 constexpr auto NALU_TYPE_TRAIL_R                        = 1;
23 constexpr auto NALU_TYPE_TSA_N                          = 2;
24 constexpr auto NALU_TYPE_TSA_R                          = 3;
25 constexpr auto NALU_TYPE_STSA_N                         = 4;
26 constexpr auto NALU_TYPE_STSA_R                         = 5;
27 constexpr auto NALU_TYPE_RADL_N                         = 6;
28 constexpr auto NALU_TYPE_RADL_R                         = 7;
29 constexpr auto NALU_TYPE_RASL_N                         = 8;
30 constexpr auto NALU_TYPE_RASL_R                         = 9;
31 constexpr auto NALU_TYPE_RSV_VCL_N10                    = 10;
32 constexpr auto NALU_TYPE_RSV_VCL_N12                    = 12;
33 constexpr auto NALU_TYPE_RSV_VCL_N14                    = 14;
34 constexpr auto NALU_TYPE_RSV_VCL_R11                    = 11;
35 constexpr auto NALU_TYPE_RSV_VCL_R13                    = 13;
36 constexpr auto NALU_TYPE_RSV_VCL_R15                    = 15;
37 constexpr auto NALU_TYPE_BLA_W_LP                       = 16;
38 constexpr auto NALU_TYPE_BLA_W_RADL                     = 17;
39 constexpr auto NALU_TYPE_BLA_N_LP                       = 18;
40 constexpr auto NALU_TYPE_IDR_W_RADL                     = 19;
41 constexpr auto NALU_TYPE_IDR_N_LP                       = 20;
42 constexpr auto NALU_TYPE_CRA_NUT                        = 21;
43 constexpr auto NALU_TYPE_RSV_RAP_VCL22                  = 22;
44 constexpr auto NALU_TYPE_RSV_RAP_VCL23                  = 23;
45 constexpr auto NALU_TYPE_RSV_VCL24                      = 24;
46 constexpr auto NALU_TYPE_RSV_VCL25                      = 25;
47 constexpr auto NALU_TYPE_RSV_VCL26                      = 26;
48 constexpr auto NALU_TYPE_RSV_VCL27                      = 27;
49 constexpr auto NALU_TYPE_RSV_VCL28                      = 28;
50 constexpr auto NALU_TYPE_RSV_VCL29                      = 29;
51 constexpr auto NALU_TYPE_RSV_VCL30                      = 30;
52 constexpr auto NALU_TYPE_RSV_VCL31                      = 31;
53 
54 //Non-VCL NALs
55 constexpr auto NALU_TYPE_VIDEO_PARAM                    = 32;
56 constexpr auto NALU_TYPE_SEQ_PARAM                      = 33;
57 constexpr auto NALU_TYPE_PIC_PARAM                      = 34;
58 constexpr auto NALU_TYPE_ACCESS_UNIT                    = 35;
59 constexpr auto NALU_TYPE_END_OF_SEQ                     = 36;
60 constexpr auto NALU_TYPE_END_OF_STREAM                  = 37;
61 constexpr auto NALU_TYPE_FILLER_DATA                    = 38;
62 constexpr auto NALU_TYPE_PREFIX_SEI                     = 39;
63 constexpr auto NALU_TYPE_SUFFIX_SEI                     = 40;
64 constexpr auto NALU_TYPE_RSV_NVCL41                     = 41;
65 constexpr auto NALU_TYPE_RSV_NVCL42                     = 42;
66 constexpr auto NALU_TYPE_RSV_NVCL43                     = 43;
67 constexpr auto NALU_TYPE_RSV_NVCL44                     = 44;
68 constexpr auto NALU_TYPE_RSV_NVCL45                     = 45;
69 constexpr auto NALU_TYPE_RSV_NVCL46                     = 46;
70 constexpr auto NALU_TYPE_RSV_NVCL47                     = 47;
71 constexpr auto NALU_TYPE_UNSPEC48                       = 48;
72 constexpr auto NALU_TYPE_UNSPEC49                       = 49;
73 constexpr auto NALU_TYPE_UNSPEC50                       = 50;
74 constexpr auto NALU_TYPE_UNSPEC51                       = 51;
75 constexpr auto NALU_TYPE_UNSPEC52                       = 52;
76 constexpr auto NALU_TYPE_UNSPEC53                       = 53;
77 constexpr auto NALU_TYPE_UNSPEC54                       = 54;
78 constexpr auto NALU_TYPE_UNSPEC55                       = 55;
79 constexpr auto NALU_TYPE_UNSPEC56                       = 56;
80 constexpr auto NALU_TYPE_UNSPEC57                       = 57;
81 constexpr auto NALU_TYPE_UNSPEC58                       = 58;
82 constexpr auto NALU_TYPE_UNSPEC59                       = 59;
83 constexpr auto NALU_TYPE_UNSPEC60                       = 60;
84 constexpr auto NALU_TYPE_UNSPEC61                       = 61;
85 constexpr auto NALU_TYPE_UNSPEC62                       = 62;
86 constexpr auto NALU_TYPE_UNSPEC63                       = 63;
87 
88 constexpr auto SEI_BUFFERING_PERIOD                     = 0;
89 constexpr auto SEI_PICTURE_TIMING                       = 1;
90 constexpr auto SEI_PAN_SCAN_RECT                        = 2;
91 constexpr auto SEI_FILLER_PAYLOAD                       = 3;
92 constexpr auto SEI_USER_DATA_REGISTERED_ITU_T_T35       = 4;
93 constexpr auto SEI_USER_DATA_UNREGISTERED               = 5;
94 constexpr auto SEI_RECOVERY_POINT                       = 6;
95 constexpr auto SEI_SCENE_INFO                           = 9;
96 constexpr auto SEI_FULL_FRAME_SNAPSHOT                  = 15;
97 constexpr auto SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START = 16;
98 constexpr auto SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17;
99 constexpr auto SEI_FILM_GRAIN_CHARACTERISTICS           = 19;
100 constexpr auto SEI_POST_FILTER_HINT                     = 22;
101 constexpr auto SEI_TONE_MAPPING_INFO                    = 23;
102 constexpr auto SEI_FRAME_PACKING                        = 45;
103 constexpr auto SEI_DISPLAY_ORIENTATION                  = 47;
104 constexpr auto SEI_SOP_DESCRIPTION                      = 128;
105 constexpr auto SEI_ACTIVE_PARAMETER_SETS                = 129;
106 constexpr auto SEI_DECODING_UNIT_INFO                   = 130;
107 constexpr auto SEI_TEMPORAL_LEVEL0_INDEX                = 131;
108 constexpr auto SEI_DECODED_PICTURE_HASH                 = 132;
109 constexpr auto SEI_SCALABLE_NESTING                     = 133;
110 constexpr auto SEI_REGION_REFRESH_INFO                  = 134;
111 
112 constexpr auto SLICE_TYPE_B                             = 0;
113 constexpr auto SLICE_TYPE_P                             = 1;
114 constexpr auto SLICE_TYPE_I                             = 2;
115 
116 constexpr auto EXTENDED_SAR                             = 0xff;
117 constexpr auto NUM_PREDEFINED_PARS                      = 17;
118 
119 struct par_extraction_t;
120 
121 /*
122 Bytes                                    Bits
123       configuration_version               8     The value should be 0 until the format has been finalized. Thereafter is should have the specified value
124                                                 (probably 1). This allows us to recognize (and ignore) non-standard CodecPrivate
125 1
126       general_profile_space               2     Specifies the context for the interpretation of general_profile_idc and
127                                                 general_profile_compatibility_flag
128       general_tier_flag                   1     Specifies the context for the interpretation of general_level_idc
129       general_profile_idc                 5     Defines the profile of the bitstream
130 1
131       general_profile_compatibility_flag  32    Defines profile compatibility, see [2] for interpretation
132 4
133       general_progressive_source_flag     1     Source is progressive, see [2] for interpretation.
134       general_interlace_source_flag       1     Source is interlaced, see [2] for interpretation.
135       general_nonpacked_constraint_flag   1     If 1 then no frame packing arrangement SEI messages, see [2] for more information
136       general_frame_only_constraint_flag  1     If 1 then no fields, see [2] for interpretation
137       reserved                            44    Reserved field, value TBD 0
138 6
139       general_level_idc                   8     Defines the level of the bitstream
140 1
141       reserved                            4     Reserved Field, value '1111'b
142       min_spatial_segmentation_idc        12    Maximum possible size of distinct coded spatial segmentation regions in the pictures of the CVS
143 2
144       reserved                            6     Reserved Field, value '111111'b
145       Parallelism_type                    2     0=unknown, 1=slices, 2=tiles, 3=WPP
146 1
147       reserved                            6     Reserved field, value '111111'b
148       chroma_format_idc                   2     See table 6-1, HEVC
149 1
150       reserved                            5     Reserved Field, value '11111'b
151       bit_depth_luma_minus8               3     Bit depth luma minus 8
152 1
153       reserved                            5     Reserved Field, value '11111'b
154       bit_depth_chroma_minus8             3     Bit depth chroma minus 8
155 1
156       reserved                            16    Reserved Field, value 0
157 2
158       reserved                            2     Reserved Field, value 0
159       max_sub_layers                      3     maximum number of temporal sub-layers
160       temporal_id_nesting_flag            1     Specifies whether inter prediction is additionally restricted. see [2] for interpretation.
161       size_nalu_minus_one                 2     Size of field NALU Length – 1
162 1
163       num_parameter_sets                  8     Number of parameter sets
164 1
165 --
166 23 bytes total
167 */
168 
169 struct codec_private_t {
170   unsigned char configuration_version{};
171 
172   // vps data
173   unsigned int profile_space{};
174   unsigned int tier_flag{};
175   unsigned int profile_idc{};
176   unsigned int profile_compatibility_flag{};
177   unsigned int progressive_source_flag{};
178   unsigned int interlaced_source_flag{};
179   unsigned int non_packed_constraint_flag{};
180   unsigned int frame_only_constraint_flag{};
181   unsigned int level_idc{};
182 
183   // sps data
184   unsigned int  min_spatial_segmentation_idc{};
185   unsigned char chroma_format_idc{};
186   unsigned char bit_depth_luma_minus8{};
187   unsigned char bit_depth_chroma_minus8{};
188   unsigned int  max_sub_layers_minus1{};
189   unsigned int  temporal_id_nesting_flag{};
190 
191   unsigned char num_parameter_sets{};
192   unsigned char parallelism_type{};
193 
194   int vps_data_id{-1};
195   int sps_data_id{-1};
196 
197   void clear();
198 };
199 
200 struct vps_info_t {
201   unsigned int id{};
202 
203   unsigned int profile_space{};
204   unsigned int tier_flag{};
205   unsigned int profile_idc{};
206   unsigned int profile_compatibility_flag{};
207   unsigned int progressive_source_flag{};
208   unsigned int interlaced_source_flag{};
209   unsigned int non_packed_constraint_flag{};
210   unsigned int frame_only_constraint_flag{};
211   unsigned int level_idc{};
212 
213   unsigned int max_sub_layers_minus1{};
214 
215   uint32_t checksum{};
216 
217   void clear();
218 };
219 
220 struct short_term_ref_pic_set_t {
221   int inter_ref_pic_set_prediction_flag{};
222   int delta_idx{};
223   int delta_rps_sign{};
224   int abs_delta_rps{};
225 
226   int delta_poc[16+1]{};
227   int used[16+1]{};
228   int ref_id[16+1]{};
229 
230   int num_ref_id{};
231   int num_pics{};
232   int num_negative_pics{};
233   int num_positive_pics{};
234 
235   void clear();
236 };
237 
238 struct vui_info_t {
239   unsigned int video_full_range_flag{};
240   unsigned int colour_primaries{};
241   unsigned int transfer_characteristics{};
242   unsigned int matrix_coefficients{};
243 
244   unsigned int chroma_sample_loc_type_top_field{};
245   unsigned int chroma_sample_loc_type_bottom_field{};
246 };
247 
248 struct sps_info_t {
249   unsigned int id{};
250   unsigned int vps_id{};
251   unsigned int max_sub_layers_minus1{};
252   unsigned int temporal_id_nesting_flag{};
253 
254   short_term_ref_pic_set_t short_term_ref_pic_sets[64]{};
255 
256   unsigned int chroma_format_idc{};
257   unsigned int bit_depth_luma_minus8{};
258   unsigned int bit_depth_chroma_minus8{};
259   unsigned int separate_colour_plane_flag{};
260   unsigned int log2_min_luma_coding_block_size_minus3{};
261   unsigned int log2_diff_max_min_luma_coding_block_size{};
262   unsigned int log2_max_pic_order_cnt_lsb{};
263 
264   bool conformance_window_flag{};
265   unsigned int conf_win_left_offset{}, conf_win_right_offset{}, conf_win_top_offset{}, conf_win_bottom_offset{};
266 
267   // vui:
268   bool vui_present{}, ar_found{}, field_seq_flag{};
269   unsigned int par_num{}, par_den{};
270   unsigned int min_spatial_segmentation_idc{};
271   vui_info_t vui{};
272 
273   // timing_info:
274   bool timing_info_present{};
275   unsigned int num_units_in_tick{}, time_scale{};
276 
277   unsigned int width{}, height{};
278 
279   unsigned int vps{};
280 
281   uint32_t checksum{};
282 
283   void dump();
284 
285   bool timing_info_valid() const;
286   int64_t default_duration() const;
287 
288   void clear();
289 
get_widthsps_info_t290   unsigned int get_width() const {
291     return width;
292   }
293 
get_heightsps_info_t294   unsigned int get_height() const {
295     return height * (field_seq_flag ? 2 : 1);
296   }
297 };
298 
299 struct pps_info_t {
300   unsigned id{};
301   unsigned sps_id{};
302 
303   bool dependent_slice_segments_enabled_flag{};
304   bool output_flag_present_flag{};
305   unsigned int num_extra_slice_header_bits{};
306 
307   uint32_t checksum{};
308 
309   void clear();
310   void dump();
311 };
312 
313 class es_parser_c;
314 
315 using user_data_t = std::map< std::vector<unsigned char>, std::vector<unsigned char> >;
316 using es_parser_cptr = std::shared_ptr<es_parser_c>;
317 
318 }                              // namespace mtx::hevc
319