1 /*
2    mkvmerge -- utility for splicing together matroska files
3    from component media subtypes
4 
5    Distributed under the GPL v2
6    see the file COPYING for details
7    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8 
9    definitions and helper functions for DTS data
10 
11    Written by Peter Niemayer <niemayer@isg.de>.
12    Modified by Moritz Bunkus <moritz@bunkus.org>.
13 */
14 
15 #pragma once
16 
17 #include "common/codec.h"
18 #include "common/timestamp.h"
19 
20 namespace mtx::bits {
21 class reader_c;
22 }
23 
24 namespace mtx::dts {
25 
26 enum class sync_word_e {
27     core = 0x7ffe8001
28   , exss = 0x64582025
29   , lbr  = 0x0a801921
30   , xll  = 0x41a29547
31   , x96  = 0x1d95f262
32   , xch  = 0x5a5a5a5a
33 };
34 
35 enum class frametype_e {
36   // Used to extremely precisely specify the end-of-stream (single PCM
37   // sample resolution).
38     termination = 0
39   , normal
40  };
41 
42 enum class extension_audio_descriptor_e {
43     xch = 0                      // channel extension
44   , unknown1
45   , x96k                         // frequency extension
46   , xch_x96k                     // both channel and frequency extension
47   , unknown4
48   , unknown5
49   , unknown6
50   , unknown7
51 };
52 
53 enum extension_mask_e {
54     css_core  = 0x001
55   , css_xxch  = 0x002
56   , css_x96   = 0x004
57   , css_xch   = 0x008
58   , exss_core = 0x010
59   , exss_xbr  = 0x020
60   , exss_xxch = 0x040
61   , exss_x96  = 0x080
62   , exss_lbr  = 0x100
63   , exss_xll  = 0x200
64   , exss_rsv1 = 0x400
65   , exss_rsv2 = 0x800
66 };
67 
68 enum class lfe_type_e {
69     none
70   , lfe_128 // 128 indicates the interpolation factor to reconstruct the lfe channel
71   , lfe_64  //  64 indicates the interpolation factor to reconstruct the lfe channel
72   , invalid
73 };
74 
75 enum class multirate_interpolator_e {
76     non_perfect
77   , perfect
78 };
79 
80 enum class dts_type_e {
81     normal
82   , high_resolution
83   , master_audio
84   , express
85   , es
86   , x96_24
87 };
88 
89 enum class source_pcm_resolution_e {
90     spr_16 = 0
91   , spr_16_ES  //_ES means: surround channels mastered in DTS-ES
92   , spr_20
93   , spr_20_ES
94   , spr_invalid4
95   , spr_24_ES
96   , spr_24
97   , spr_invalid7
98 };
99 
100 enum class lbr_format_info_code_e {
101     sync_only    = 1
102   , decoder_init = 2
103 };
104 
105 static const int64_t max_packet_size = 15384;
106 
107 struct header_t {
108   frametype_e frametype{ frametype_e::normal };
109 
110   // 0 for normal frames, 1 to 30 for termination frames. Number of PCM
111   // samples the frame is shorter than normal.
112   unsigned int deficit_sample_count{};
113 
114   // If true, a CRC-sum is included in the data.
115   bool crc_present{};
116 
117   // number of PCM core sample blocks in this frame. Each PCM core sample block
118   // consists of 32 samples. Notice that "core samples" means "samples
119   // after the input decimator", so at sampling frequencies >48kHz, one core
120   // sample represents 2 (or 4 for frequencies >96kHz) output samples.
121   unsigned int num_pcm_sample_blocks{};
122 
123   // Number of bytes this frame occupies (range: 95 to 16 383).
124   unsigned int frame_byte_size{};
125 
126   // Number of audio channels, -1 for "unknown".
127   int audio_channels{};
128 
129   // String describing the audio channel arrangement
130   const char *audio_channel_arrangement{};
131 
132   // -1 for "invalid"
133   unsigned int core_sampling_frequency{};
134   std::optional<unsigned int> extension_sampling_frequency;
135 
136   // in bit per second, or -1 == "open", -2 == "variable", -3 == "lossless"
137   int transmission_bitrate{};
138 
139   // if true, sub-frames contain coefficients for downmixing to stereo
140   bool embedded_down_mix{};
141 
142   // if true, sub-frames contain coefficients for dynamic range correction
143   bool embedded_dynamic_range{};
144 
145   // if true, a time stamp is embedded at the end of the core audio data
146   bool embedded_time_stamp{};
147 
148   // if true, auxiliary data is appended at the end of the core audio data
149   bool auxiliary_data{};
150 
151   // if true, the source material was mastered in HDCD format
152   bool hdcd_master{};
153 
154   extension_audio_descriptor_e extension_audio_descriptor{ extension_audio_descriptor_e::xch }; // significant only if extended_coding == true
155 
156   // if true, extended coding data is placed after the core audio data
157   bool extended_coding{};
158 
159   // if true, audio data check words are placed in each sub-sub-frame
160   // rather than in each sub-frame, only
161   bool audio_sync_word_in_sub_sub{};
162 
163   lfe_type_e lfe_type{ lfe_type_e::none };
164 
165   // if true, past frames will be used to predict ADPCM values for the
166   // current one. This means, if this flag is false, the current frame is
167   // better suited as an audio-jump-point (like an "I-frame" in video-coding).
168   bool predictor_history_flag{};
169 
170   // which FIR coefficients to use for sub-band reconstruction
171   multirate_interpolator_e multirate_interpolator{ multirate_interpolator_e::non_perfect };
172 
173   // 0 to 15
174   unsigned int encoder_software_revision{};
175 
176   // 0 to 3 - "top-secret" bits indicating the "copy history" of the material
177   unsigned int copy_history{};
178 
179   // 16, 20 or 24 bits per sample, or -1 == invalid
180   int source_pcm_resolution{};
181 
182   // if true, source surround channels are mastered in DTS-ES
183   bool source_surround_in_es{};
184 
185   // if true, left and right front channels are encoded as
186   // sum and difference (L = L + R, R = L - R)
187   bool front_sum_difference{};
188 
189   // same as front_sum_difference for surround left and right channels
190   bool surround_sum_difference{};
191 
192   // gain in dB to apply for dialog normalization
193   int dialog_normalization_gain{}, extension_dialog_normalization_gain{};
194   unsigned int dialog_normalization_gain_bit_position{}, extension_dialog_normalization_gain_bit_position{};
195 
196   std::optional<unsigned int> crc{};
197 
198   bool has_core{}, has_exss{}, has_xch{};
199   unsigned int exss_offset{}, exss_header_size{}, exss_part_size{};
200 
201   dts_type_e dts_type{ dts_type_e::normal };
202 
203   bool static_fields_present{}, mix_metadata_enabled{};
204   unsigned int reference_clock_code{}, substream_frame_duration{};
205   unsigned int substream_size_bits{}, num_presentations{1}, num_assets{1}, num_mixing_configurations{};
206   unsigned int num_mixing_channels[5];
207 
208   struct substream_asset_t {
209     std::size_t asset_offset{}, asset_size{}, asset_index{};
210 
211     unsigned int pcm_bit_res{}, max_sample_rate{}, num_channels_total{};
212     bool one_to_one_map_channel_to_speaker{}, embedded_stereo{}, embedded_6ch{};
213     int representation_type{};
214 
215     int coding_mode{};
216     extension_mask_e extension_mask{};
217 
218     std::size_t core_offset{}, core_size{};
219     std::size_t xbr_offset{},  xbr_size{};
220     std::size_t xxch_offset{}, xxch_size{};
221     std::size_t x96_offset{},  x96_size{};
222     std::size_t lbr_offset{},  lbr_size{};
223     std::size_t xll_offset{},  xll_size{};
224 
225     bool lbr_sync_present{}, xll_sync_present{};
226     int xll_delay_num_frames{};
227     std::size_t xll_sync_offset{};
228 
229     unsigned int hd_stream_id{};
230   };
231 
232   std::vector<substream_asset_t> substream_assets;
233 
234 public:
235   uint64_t get_packet_length_in_core_samples() const;
236   timestamp_c get_packet_length_in_nanoseconds() const;
237 
238   unsigned int get_core_num_audio_channels() const;
239   unsigned int get_total_num_audio_channels() const;
240   codec_c::specialization_e get_codec_specialization() const;
241   unsigned int get_effective_sampling_frequency() const;
242 
243   void print() const;
244 
245   bool decode_core_header(unsigned char const *buf, std::size_t size, bool allow_no_exss_search = false);
246   bool decode_exss_header(unsigned char const *buf, std::size_t size);
247   bool decode_x96_header(unsigned char const *buf, std::size_t size);
248 
249 protected:
250   bool decode_asset(mtx::bits::reader_c &bc, substream_asset_t &asset);
251   bool decode_lbr_header(mtx::bits::reader_c &bc, substream_asset_t &asset);
252   bool decode_xll_header(mtx::bits::reader_c &bc, substream_asset_t &asset);
253   void parse_lbr_parameters(mtx::bits::reader_c &bc, substream_asset_t &asset);
254   void parse_xll_parameters(mtx::bits::reader_c &bc, substream_asset_t &asset);
255   void locate_and_decode_xch_header(unsigned char const *buf, std::size_t size);
256 
257   bool set_one_extension_offset(substream_asset_t &asset, extension_mask_e wanted_mask, std::size_t &offset, std::size_t &size, std::size_t &offset_in_asset, std::size_t size_in_asset);
258   bool set_extension_offsets(substream_asset_t &asset);
259 };
260 
261 int find_sync_word(unsigned char const *buf, std::size_t size);
262 int find_header(unsigned char const *buf, std::size_t size, header_t &header, bool allow_no_exss_search = false);
263 int find_consecutive_headers(unsigned char const *buf, std::size_t size, unsigned int num);
264 
265 bool operator ==(header_t const &h1, header_t const &h2);
266 bool operator!=(header_t const &h1, header_t const &h2);
267 
268 void convert_14_to_16_bits(const unsigned short *src, unsigned long srcwords, unsigned short *dst);
269 
270 bool detect(const void *src_buf, int len, bool &convert_14_to_16, bool &swap_bytes);
271 
272 void remove_dialog_normalization_gain(unsigned char *buf, std::size_t size);
273 
274 
275 }
276