1 /* 2 mkvmerge -- utility for splicing together matroska files 3 from component media subtypes 4 5 Distributed under the GPL v2 6 see the file COPYING for details 7 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html 8 9 definitions and helper functions for DTS data 10 11 Written by Peter Niemayer <niemayer@isg.de>. 12 Modified by Moritz Bunkus <moritz@bunkus.org>. 13 */ 14 15 #pragma once 16 17 #include "common/codec.h" 18 #include "common/timestamp.h" 19 20 namespace mtx::bits { 21 class reader_c; 22 } 23 24 namespace mtx::dts { 25 26 enum class sync_word_e { 27 core = 0x7ffe8001 28 , exss = 0x64582025 29 , lbr = 0x0a801921 30 , xll = 0x41a29547 31 , x96 = 0x1d95f262 32 , xch = 0x5a5a5a5a 33 }; 34 35 enum class frametype_e { 36 // Used to extremely precisely specify the end-of-stream (single PCM 37 // sample resolution). 38 termination = 0 39 , normal 40 }; 41 42 enum class extension_audio_descriptor_e { 43 xch = 0 // channel extension 44 , unknown1 45 , x96k // frequency extension 46 , xch_x96k // both channel and frequency extension 47 , unknown4 48 , unknown5 49 , unknown6 50 , unknown7 51 }; 52 53 enum extension_mask_e { 54 css_core = 0x001 55 , css_xxch = 0x002 56 , css_x96 = 0x004 57 , css_xch = 0x008 58 , exss_core = 0x010 59 , exss_xbr = 0x020 60 , exss_xxch = 0x040 61 , exss_x96 = 0x080 62 , exss_lbr = 0x100 63 , exss_xll = 0x200 64 , exss_rsv1 = 0x400 65 , exss_rsv2 = 0x800 66 }; 67 68 enum class lfe_type_e { 69 none 70 , lfe_128 // 128 indicates the interpolation factor to reconstruct the lfe channel 71 , lfe_64 // 64 indicates the interpolation factor to reconstruct the lfe channel 72 , invalid 73 }; 74 75 enum class multirate_interpolator_e { 76 non_perfect 77 , perfect 78 }; 79 80 enum class dts_type_e { 81 normal 82 , high_resolution 83 , master_audio 84 , express 85 , es 86 , x96_24 87 }; 88 89 enum class source_pcm_resolution_e { 90 spr_16 = 0 91 , spr_16_ES //_ES means: surround channels mastered in DTS-ES 92 , spr_20 93 , spr_20_ES 94 , spr_invalid4 95 , spr_24_ES 96 , spr_24 97 , spr_invalid7 98 }; 99 100 enum class lbr_format_info_code_e { 101 sync_only = 1 102 , decoder_init = 2 103 }; 104 105 static const int64_t max_packet_size = 15384; 106 107 struct header_t { 108 frametype_e frametype{ frametype_e::normal }; 109 110 // 0 for normal frames, 1 to 30 for termination frames. Number of PCM 111 // samples the frame is shorter than normal. 112 unsigned int deficit_sample_count{}; 113 114 // If true, a CRC-sum is included in the data. 115 bool crc_present{}; 116 117 // number of PCM core sample blocks in this frame. Each PCM core sample block 118 // consists of 32 samples. Notice that "core samples" means "samples 119 // after the input decimator", so at sampling frequencies >48kHz, one core 120 // sample represents 2 (or 4 for frequencies >96kHz) output samples. 121 unsigned int num_pcm_sample_blocks{}; 122 123 // Number of bytes this frame occupies (range: 95 to 16 383). 124 unsigned int frame_byte_size{}; 125 126 // Number of audio channels, -1 for "unknown". 127 int audio_channels{}; 128 129 // String describing the audio channel arrangement 130 const char *audio_channel_arrangement{}; 131 132 // -1 for "invalid" 133 unsigned int core_sampling_frequency{}; 134 std::optional<unsigned int> extension_sampling_frequency; 135 136 // in bit per second, or -1 == "open", -2 == "variable", -3 == "lossless" 137 int transmission_bitrate{}; 138 139 // if true, sub-frames contain coefficients for downmixing to stereo 140 bool embedded_down_mix{}; 141 142 // if true, sub-frames contain coefficients for dynamic range correction 143 bool embedded_dynamic_range{}; 144 145 // if true, a time stamp is embedded at the end of the core audio data 146 bool embedded_time_stamp{}; 147 148 // if true, auxiliary data is appended at the end of the core audio data 149 bool auxiliary_data{}; 150 151 // if true, the source material was mastered in HDCD format 152 bool hdcd_master{}; 153 154 extension_audio_descriptor_e extension_audio_descriptor{ extension_audio_descriptor_e::xch }; // significant only if extended_coding == true 155 156 // if true, extended coding data is placed after the core audio data 157 bool extended_coding{}; 158 159 // if true, audio data check words are placed in each sub-sub-frame 160 // rather than in each sub-frame, only 161 bool audio_sync_word_in_sub_sub{}; 162 163 lfe_type_e lfe_type{ lfe_type_e::none }; 164 165 // if true, past frames will be used to predict ADPCM values for the 166 // current one. This means, if this flag is false, the current frame is 167 // better suited as an audio-jump-point (like an "I-frame" in video-coding). 168 bool predictor_history_flag{}; 169 170 // which FIR coefficients to use for sub-band reconstruction 171 multirate_interpolator_e multirate_interpolator{ multirate_interpolator_e::non_perfect }; 172 173 // 0 to 15 174 unsigned int encoder_software_revision{}; 175 176 // 0 to 3 - "top-secret" bits indicating the "copy history" of the material 177 unsigned int copy_history{}; 178 179 // 16, 20 or 24 bits per sample, or -1 == invalid 180 int source_pcm_resolution{}; 181 182 // if true, source surround channels are mastered in DTS-ES 183 bool source_surround_in_es{}; 184 185 // if true, left and right front channels are encoded as 186 // sum and difference (L = L + R, R = L - R) 187 bool front_sum_difference{}; 188 189 // same as front_sum_difference for surround left and right channels 190 bool surround_sum_difference{}; 191 192 // gain in dB to apply for dialog normalization 193 int dialog_normalization_gain{}, extension_dialog_normalization_gain{}; 194 unsigned int dialog_normalization_gain_bit_position{}, extension_dialog_normalization_gain_bit_position{}; 195 196 std::optional<unsigned int> crc{}; 197 198 bool has_core{}, has_exss{}, has_xch{}; 199 unsigned int exss_offset{}, exss_header_size{}, exss_part_size{}; 200 201 dts_type_e dts_type{ dts_type_e::normal }; 202 203 bool static_fields_present{}, mix_metadata_enabled{}; 204 unsigned int reference_clock_code{}, substream_frame_duration{}; 205 unsigned int substream_size_bits{}, num_presentations{1}, num_assets{1}, num_mixing_configurations{}; 206 unsigned int num_mixing_channels[5]; 207 208 struct substream_asset_t { 209 std::size_t asset_offset{}, asset_size{}, asset_index{}; 210 211 unsigned int pcm_bit_res{}, max_sample_rate{}, num_channels_total{}; 212 bool one_to_one_map_channel_to_speaker{}, embedded_stereo{}, embedded_6ch{}; 213 int representation_type{}; 214 215 int coding_mode{}; 216 extension_mask_e extension_mask{}; 217 218 std::size_t core_offset{}, core_size{}; 219 std::size_t xbr_offset{}, xbr_size{}; 220 std::size_t xxch_offset{}, xxch_size{}; 221 std::size_t x96_offset{}, x96_size{}; 222 std::size_t lbr_offset{}, lbr_size{}; 223 std::size_t xll_offset{}, xll_size{}; 224 225 bool lbr_sync_present{}, xll_sync_present{}; 226 int xll_delay_num_frames{}; 227 std::size_t xll_sync_offset{}; 228 229 unsigned int hd_stream_id{}; 230 }; 231 232 std::vector<substream_asset_t> substream_assets; 233 234 public: 235 uint64_t get_packet_length_in_core_samples() const; 236 timestamp_c get_packet_length_in_nanoseconds() const; 237 238 unsigned int get_core_num_audio_channels() const; 239 unsigned int get_total_num_audio_channels() const; 240 codec_c::specialization_e get_codec_specialization() const; 241 unsigned int get_effective_sampling_frequency() const; 242 243 void print() const; 244 245 bool decode_core_header(unsigned char const *buf, std::size_t size, bool allow_no_exss_search = false); 246 bool decode_exss_header(unsigned char const *buf, std::size_t size); 247 bool decode_x96_header(unsigned char const *buf, std::size_t size); 248 249 protected: 250 bool decode_asset(mtx::bits::reader_c &bc, substream_asset_t &asset); 251 bool decode_lbr_header(mtx::bits::reader_c &bc, substream_asset_t &asset); 252 bool decode_xll_header(mtx::bits::reader_c &bc, substream_asset_t &asset); 253 void parse_lbr_parameters(mtx::bits::reader_c &bc, substream_asset_t &asset); 254 void parse_xll_parameters(mtx::bits::reader_c &bc, substream_asset_t &asset); 255 void locate_and_decode_xch_header(unsigned char const *buf, std::size_t size); 256 257 bool set_one_extension_offset(substream_asset_t &asset, extension_mask_e wanted_mask, std::size_t &offset, std::size_t &size, std::size_t &offset_in_asset, std::size_t size_in_asset); 258 bool set_extension_offsets(substream_asset_t &asset); 259 }; 260 261 int find_sync_word(unsigned char const *buf, std::size_t size); 262 int find_header(unsigned char const *buf, std::size_t size, header_t &header, bool allow_no_exss_search = false); 263 int find_consecutive_headers(unsigned char const *buf, std::size_t size, unsigned int num); 264 265 bool operator ==(header_t const &h1, header_t const &h2); 266 bool operator!=(header_t const &h1, header_t const &h2); 267 268 void convert_14_to_16_bits(const unsigned short *src, unsigned long srcwords, unsigned short *dst); 269 270 bool detect(const void *src_buf, int len, bool &convert_14_to_16, bool &swap_bytes); 271 272 void remove_dialog_normalization_gain(unsigned char *buf, std::size_t size); 273 274 275 } 276