1 /** \brief output handling
2 
3    mkvmerge -- utility for splicing together Matroska files
4    from component media sub-types
5 
6    Distributed under the GPL v2
7    see the file COPYING for details
8    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
9 
10    Written by Moritz Bunkus <moritz@bunkus.org>.
11 */
12 
13 #include "common/common_pch.h"
14 
15 #include <typeinfo>
16 
17 #include "common/mm_file_io.h"
18 #include "common/mm_mpls_multi_file_io.h"
19 #include "common/mm_proxy_io.h"
20 #include "common/mm_read_buffer_io.h"
21 #include "common/mm_text_io.h"
22 #include "common/path.h"
23 #include "common/strings/formatting.h"
24 #include "common/xml/xml.h"
25 #include "input/r_aac.h"
26 #include "input/r_ac3.h"
27 #include "input/r_avc.h"
28 #include "input/r_avi.h"
29 #include "input/r_coreaudio.h"
30 #include "input/r_dirac.h"
31 #include "input/r_dts.h"
32 #include "input/r_dv.h"
33 #include "input/r_flac.h"
34 #include "input/r_flv.h"
35 #include "input/r_hdmv_pgs.h"
36 #include "input/r_hdmv_textst.h"
37 #include "input/r_hevc.h"
38 #include "input/r_ivf.h"
39 #include "input/r_matroska.h"
40 #include "input/r_microdvd.h"
41 #include "input/r_mp3.h"
42 #include "input/r_mpeg_es.h"
43 #include "input/r_mpeg_ps.h"
44 #include "input/r_mpeg_ts.h"
45 #include "input/r_obu.h"
46 #include "input/r_ogm.h"
47 #include "input/r_qtmp4.h"
48 #include "input/r_real.h"
49 #include "input/r_srt.h"
50 #include "input/r_ssa.h"
51 #include "input/r_truehd.h"
52 #include "input/r_tta.h"
53 #include "input/r_usf.h"
54 #include "input/r_vc1.h"
55 #include "input/r_vobbtn.h"
56 #include "input/r_vobsub.h"
57 #include "input/r_wav.h"
58 #include "input/r_wavpack.h"
59 #include "input/r_webvtt.h"
60 #include "input/unsupported_types_signature_prober.h"
61 #include "merge/filelist.h"
62 #include "merge/input_x.h"
63 #include "merge/probe_range_info.h"
64 #include "merge/reader_detection_and_creation.h"
65 
66 static std::vector<std::filesystem::path>
file_names_to_paths(const std::vector<std::string> & file_names)67 file_names_to_paths(const std::vector<std::string> &file_names) {
68   std::vector<std::filesystem::path> paths;
69   for (auto &file_name : file_names)
70     paths.push_back(mtx::fs::absolute(mtx::fs::to_path(file_name)));
71 
72   return paths;
73 }
74 
75 static mm_io_cptr
open_input_file(filelist_t & file)76 open_input_file(filelist_t &file) {
77   try {
78     if (file.all_names.size() == 1)
79       return std::make_shared<mm_read_buffer_io_c>(std::make_shared<mm_file_io_c>(file.name));
80 
81     else {
82       std::vector<std::filesystem::path> paths = file_names_to_paths(file.all_names);
83       return std::make_shared<mm_read_buffer_io_c>(std::make_shared<mm_multi_file_io_c>(paths, file.name));
84     }
85 
86   } catch (mtx::mm_io::exception &ex) {
87     mxerror(fmt::format(Y("The file '{0}' could not be opened for reading: {1}.\n"), file.name, ex));
88     return mm_io_cptr{};
89 
90   } catch (...) {
91     mxerror(fmt::format(Y("The source file '{0}' could not be opened successfully, or retrieving its size by seeking to the end did not work.\n"), file.name));
92     return mm_io_cptr{};
93   }
94 }
95 
96 static bool
open_playlist_file(filelist_t & file,mm_io_c & in)97 open_playlist_file(filelist_t &file,
98                    mm_io_c &in) {
99   auto mpls_in = mm_mpls_multi_file_io_c::open_multi(in);
100   if (!mpls_in)
101     return false;
102 
103   file.is_playlist      = true;
104   file.playlist_mpls_in = std::static_pointer_cast<mm_mpls_multi_file_io_c>(mpls_in);
105 
106   return true;
107 }
108 
109 static debugging_option_c s_debug_probe{"probe_file_format"};
110 
111 template<typename Treader>
112 std::unique_ptr<Treader>
create_and_prepare_reader(mm_io_cptr const & io,probe_range_info_t const & probe_range_info={})113 create_and_prepare_reader(mm_io_cptr const &io,
114                           probe_range_info_t const &probe_range_info = {}) {
115   auto reader = std::make_unique<Treader>();
116 
117   io->setFilePointer(0);
118   reader->set_file_to_read(io);
119   reader->set_probe_range_info(probe_range_info);
120 
121   return reader;
122 }
123 
124 template<typename Treader>
125 typename std::enable_if<
126   std::is_base_of<generic_reader_c, Treader>::value,
127   std::unique_ptr<generic_reader_c>
128 >::type
do_probe(mm_io_cptr const & io,probe_range_info_t const & probe_range_info={})129 do_probe(mm_io_cptr const &io,
130          probe_range_info_t const &probe_range_info = {}) {
131   auto reader    = create_and_prepare_reader<Treader>(io, probe_range_info);
132   auto probed_ok = false;
133 
134   try {
135     probed_ok = reader->probe_file();
136   } catch (mtx::exception &ex) {
137     mxdebug_if(s_debug_probe, fmt::format("do_probe<{}>: mtx::exception caught: {}\n", typeid(Treader).name(), ex.what()));
138   } catch (...) {
139     mxdebug_if(s_debug_probe, fmt::format("do_probe<{}>: generic exception caught\n", typeid(Treader).name()));
140   }
141 
142   mxdebug_if(s_debug_probe, fmt::format("do_probe<{}>: probe result: {}\n", typeid(Treader).name(), probed_ok));
143 
144   io->setFilePointer(0);
145 
146   if (probed_ok)
147     return reader;
148 
149   return {};
150 }
151 
152 template<typename Treader>
153 typename std::enable_if<
154   !std::is_base_of<generic_reader_c, Treader>::value,
155   std::unique_ptr<generic_reader_c>
156 >::type
do_probe(mm_io_cptr const & io,probe_range_info_t const &={})157 do_probe(mm_io_cptr const &io,
158          probe_range_info_t const & = {}) {
159   io->setFilePointer(0);
160   Treader::probe_file(*io);
161   io->setFilePointer(0);
162 
163   return {};
164 }
165 
166 using prober_t = std::function<std::unique_ptr<generic_reader_c>(mm_io_cptr const &, probe_range_info_t const &)>;
167 
168 static prober_t
prober_for_type(mtx::file_type_e type)169 prober_for_type(mtx::file_type_e type) {
170   static std::map<mtx::file_type_e, prober_t> type_probe_map;
171 
172   if (type_probe_map.empty()) {
173     type_probe_map[mtx::file_type_e::avc_es]      = &do_probe<avc_es_reader_c>;
174     type_probe_map[mtx::file_type_e::avi]         = &do_probe<avi_reader_c>;
175     type_probe_map[mtx::file_type_e::coreaudio]   = &do_probe<coreaudio_reader_c>;
176     type_probe_map[mtx::file_type_e::dirac]       = &do_probe<dirac_es_reader_c>;
177     type_probe_map[mtx::file_type_e::dts]         = &do_probe<dts_reader_c>;
178     type_probe_map[mtx::file_type_e::dv]          = &do_probe<dv_reader_c>;
179     type_probe_map[mtx::file_type_e::flac]        = &do_probe<flac_reader_c>;
180     type_probe_map[mtx::file_type_e::flv]         = &do_probe<flv_reader_c>;
181     type_probe_map[mtx::file_type_e::hdmv_textst] = &do_probe<hdmv_textst_reader_c>;
182     type_probe_map[mtx::file_type_e::hevc_es]     = &do_probe<hevc_es_reader_c>;
183     type_probe_map[mtx::file_type_e::ivf]         = &do_probe<ivf_reader_c>;
184     type_probe_map[mtx::file_type_e::matroska]    = &do_probe<kax_reader_c>;
185     type_probe_map[mtx::file_type_e::mpeg_es]     = &do_probe<mpeg_es_reader_c>;
186     type_probe_map[mtx::file_type_e::mpeg_ps]     = &do_probe<mpeg_ps_reader_c>;
187     type_probe_map[mtx::file_type_e::mpeg_ts]     = &do_probe<mtx::mpeg_ts::reader_c>;
188     type_probe_map[mtx::file_type_e::obu]         = &do_probe<obu_reader_c>;
189     type_probe_map[mtx::file_type_e::ogm]         = &do_probe<ogm_reader_c>;
190     type_probe_map[mtx::file_type_e::pgssup]      = &do_probe<hdmv_pgs_reader_c>;
191     type_probe_map[mtx::file_type_e::qtmp4]       = &do_probe<qtmp4_reader_c>;
192     type_probe_map[mtx::file_type_e::real]        = &do_probe<real_reader_c>;
193     type_probe_map[mtx::file_type_e::truehd]      = &do_probe<truehd_reader_c>;
194     type_probe_map[mtx::file_type_e::tta]         = &do_probe<tta_reader_c>;
195     type_probe_map[mtx::file_type_e::vc1]         = &do_probe<vc1_es_reader_c>;
196     type_probe_map[mtx::file_type_e::vobbtn]      = &do_probe<vobbtn_reader_c>;
197     type_probe_map[mtx::file_type_e::wav]         = &do_probe<wav_reader_c>;
198     type_probe_map[mtx::file_type_e::wavpack4]    = &do_probe<wavpack_reader_c>;
199   }
200 
201   auto res = type_probe_map.find(type);
202   if (res == type_probe_map.end()) {
203     return {};
204   }
205   return (*res).second;
206 }
207 
208 std::unique_ptr<generic_reader_c>
detect_text_file_formats(filelist_t const & file)209 detect_text_file_formats(filelist_t const &file) {
210   try {
211     auto text_io = std::make_shared<mm_text_io_c>(std::make_shared<mm_read_buffer_io_c>(std::make_shared<mm_file_io_c>(file.name)));
212     std::unique_ptr<generic_reader_c> reader;
213 
214     if ((reader = do_probe<webvtt_reader_c>(text_io)))
215       return reader;
216     if ((reader = do_probe<srt_reader_c>(text_io)))
217       return reader;
218     if ((reader = do_probe<ssa_reader_c>(text_io)))
219       return reader;
220     if ((reader = do_probe<vobsub_reader_c>(text_io)))
221       return reader;
222     if ((reader = do_probe<usf_reader_c>(text_io)))
223       return reader;
224 
225     // Unsupported text subtitle formats
226     do_probe<microdvd_reader_c>(text_io);
227 
228     // Support empty files for certain types.
229     if ((text_io->get_size() - text_io->get_byte_order_length()) > 1)
230       return {};
231 
232     auto extension = mtx::fs::to_path(file.name).extension().u8string().substr(1);
233 
234     for (auto type : mtx::file_type_t::by_extension(extension))
235       if (type == mtx::file_type_e::srt)
236         return create_and_prepare_reader<srt_reader_c>(text_io);
237 
238   } catch (mtx::mm_io::exception &ex) {
239     mxerror(fmt::format(Y("The file '{0}' could not be opened for reading: {1}.\n"), file.name, ex));
240 
241   } catch (...) {
242     mxerror(fmt::format(Y("The source file '{0}' could not be opened successfully, or retrieving its size by seeking to the end did not work.\n"), file.name));
243   }
244 
245   return {};
246 }
247 
248 /** \brief Probe the file type
249 
250    Opens the input file and calls the \c probe_file function for each known
251    file reader class. Uses \c mm_text_io_c for subtitle probing.
252 */
253 std::unique_ptr<generic_reader_c>
probe_file_format(filelist_t & file)254 probe_file_format(filelist_t &file) {
255   auto io          = open_input_file(file);
256   auto is_playlist = !file.is_playlist && open_playlist_file(file, *io);
257 
258   std::unique_ptr<generic_reader_c> reader;
259 
260   if (is_playlist)
261     io = std::make_shared<mm_read_buffer_io_c>(file.playlist_mpls_in);
262 
263   // Prefer types hinted by extension
264   auto extension = mtx::fs::to_path(file.name).extension().u8string();
265   if (!extension.empty()) {
266     for (auto type : mtx::file_type_t::by_extension(extension.substr(1))) {
267       auto p = prober_for_type(type);
268       if (p && (reader = p(io, {})))
269         return reader;
270     }
271   }
272 
273   // File types that can be detected unambiguously but are not
274   // supported. The prober does not return if it detects the type.
275   do_probe<unsupported_types_signature_prober_c>(io);
276 
277   // File types that can be detected unambiguously
278   if ((reader = do_probe<avi_reader_c>(io)))
279     return reader;
280   if ((reader = do_probe<flv_reader_c>(io)))
281     return reader;
282   if ((reader = do_probe<kax_reader_c>(io)))
283     return reader;
284   if ((reader = do_probe<wav_reader_c>(io)))
285     return reader;
286   if ((reader = do_probe<ogm_reader_c>(io)))
287     return reader;
288   if ((reader = do_probe<hdmv_textst_reader_c>(io)))
289     return reader;
290   if ((reader = do_probe<flac_reader_c>(io)))
291     return reader;
292   if ((reader = do_probe<hdmv_pgs_reader_c>(io)))
293     return reader;
294   if ((reader = do_probe<real_reader_c>(io)))
295     return reader;
296   if ((reader = do_probe<qtmp4_reader_c>(io)))
297     return reader;
298   if ((reader = do_probe<tta_reader_c>(io)))
299     return reader;
300   if ((reader = do_probe<vc1_es_reader_c>(io)))
301     return reader;
302   if ((reader = do_probe<wavpack_reader_c>(io)))
303     return reader;
304   if ((reader = do_probe<ivf_reader_c>(io)))
305     return reader;
306   if ((reader = do_probe<coreaudio_reader_c>(io)))
307     return reader;
308   if ((reader = do_probe<dirac_es_reader_c>(io)))
309     return reader;
310 
311   // All text file types (subtitles).
312   if ((reader = detect_text_file_formats(file)))
313     return reader;
314 
315   // AVC & HEVC, even though often mis-detected, have a very high
316   // probability of correct detection with headers right at the start.
317   if ((reader = do_probe<avc_es_reader_c>(io, { 0, 0, true })))
318     return reader;
319   if ((reader = do_probe<hevc_es_reader_c>(io, { 0, 0, true })))
320     return reader;
321 
322   // Try raw audio formats and require eight consecutive frames at the
323   // start of the file.
324   if ((reader = do_probe<mp3_reader_c>(io, { 128 * 1024, 8, true })))
325     return reader;
326   if ((reader = do_probe<ac3_reader_c>(io, { 128 * 1024, 8, true })))
327     return reader;
328   if ((reader = do_probe<aac_reader_c>(io, { 128 * 1024, 8, true })))
329     return reader;
330 
331   // File types that are mis-detected sometimes
332   if ((reader = do_probe<dts_reader_c>(io, { 0, 0, true })))
333     return reader;
334   if ((reader = do_probe<mtx::mpeg_ts::reader_c>(io)))
335     return reader;
336   if ((reader = do_probe<mpeg_ps_reader_c>(io)))
337     return reader;
338   if ((reader = do_probe<obu_reader_c>(io)))
339     return reader;
340 
341   // File types which are the same in raw format and in other container formats.
342   // Detection requires 20 or more consecutive packets.
343   static std::vector<int> s_probe_sizes1{ { 128 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 0 } };
344   static int const s_probe_num_required_consecutive_packets1 = 64;
345 
346   for (auto probe_size : s_probe_sizes1) {
347     if ((reader = do_probe<mp3_reader_c>(io, { probe_size, s_probe_num_required_consecutive_packets1 })))
348       return reader;
349     if ((reader = do_probe<ac3_reader_c>(io, { probe_size, s_probe_num_required_consecutive_packets1 })))
350       return reader;
351     if ((reader = do_probe<aac_reader_c>(io, { probe_size, s_probe_num_required_consecutive_packets1 })))
352       return reader;
353   }
354 
355   // More file types with detection issues.
356   if ((reader = do_probe<truehd_reader_c>(io)))
357     return reader;
358   if ((reader = do_probe<dts_reader_c>(io)))
359     return reader;
360   if ((reader = do_probe<vobbtn_reader_c>(io)))
361     return reader;
362 
363   // Try some more of the raw audio formats before trying elementary
364   // stream video formats (MPEG 1/2, AVC/H.264, HEVC/H.265; those
365   // often enough simply work). However, require that the first frame
366   // starts at the beginning of the file.
367   if ((reader = do_probe<mp3_reader_c>(io, { 32 * 1024, 1, true })))
368     return reader;
369   if ((reader = do_probe<ac3_reader_c>(io, { 32 * 1024, 1, true })))
370     return reader;
371   if ((reader = do_probe<aac_reader_c>(io, { 32 * 1024, 1, true })))
372     return reader;
373 
374   if ((reader = do_probe<mpeg_es_reader_c>(io)))
375     return reader;
376   if ((reader = do_probe<avc_es_reader_c>(io, { 0, 0, false })))
377     return reader;
378   if ((reader = do_probe<hevc_es_reader_c>(io, { 0, 0, false })))
379     return reader;
380 
381   // File types which are the same in raw format and in other container formats.
382   // Detection requires 20 or more consecutive packets.
383   static std::vector<int> s_probe_sizes2{ { 32 * 1024, 64 * 1024, 128 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 0 } };
384   static int const s_probe_num_required_consecutive_packets2 = 20;
385 
386   for (auto probe_size : s_probe_sizes2) {
387     if ((reader = do_probe<mp3_reader_c>(io, { probe_size, s_probe_num_required_consecutive_packets2 })))
388       return reader;
389     else if ((reader = do_probe<ac3_reader_c>(io, { probe_size, s_probe_num_required_consecutive_packets2 })))
390       return reader;
391     else if ((reader = do_probe<aac_reader_c>(io, { probe_size, s_probe_num_required_consecutive_packets2 })))
392       return reader;
393   }
394 
395   // File types that are mis-detected sometimes and that aren't supported
396   do_probe<dv_reader_c>(io);
397 
398   return {};
399 }
400 
401 void
read_file_headers()402 read_file_headers() {
403   static auto s_debug_timestamp_restrictions = debugging_option_c{"timestamp_restrictions"};
404 
405   g_file_sizes = 0;
406 
407   for (auto &file : g_files) {
408     try {
409       file->reader->m_appending = file->appending;
410       file->reader->set_track_info(*file->ti);
411       file->reader->set_timestamp_restrictions(file->restricted_timestamp_min, file->restricted_timestamp_max);
412       file->reader->read_headers();
413 
414       // Re-calculate file size because the reader might switch to a
415       // multi I/O reader in read_headers().
416       file->size    = file->reader->get_file_size();
417       g_file_sizes += file->size;
418 
419       mxdebug_if(s_debug_timestamp_restrictions,
420                  fmt::format("Timestamp restrictions for {2}: min {0} max {1}\n", file->restricted_timestamp_min, file->restricted_timestamp_max, file->ti->m_fname));
421 
422     } catch (mtx::mm_io::open_x &error) {
423       mxerror(fmt::format(Y("The demultiplexer for the file '{0}' failed to initialize:\n{1}\n"), file->ti->m_fname, Y("The file could not be opened for reading, or there was not enough data to parse its headers.")));
424 
425     } catch (mtx::input::open_x &error) {
426       mxerror(fmt::format(Y("The demultiplexer for the file '{0}' failed to initialize:\n{1}\n"), file->ti->m_fname, Y("The file could not be opened for reading, or there was not enough data to parse its headers.")));
427 
428     } catch (mtx::input::invalid_format_x &error) {
429       mxerror(fmt::format(Y("The demultiplexer for the file '{0}' failed to initialize:\n{1}\n"), file->ti->m_fname, Y("The file content does not match its format type and was not recognized.")));
430 
431     } catch (mtx::input::header_parsing_x &error) {
432       mxerror(fmt::format(Y("The demultiplexer for the file '{0}' failed to initialize:\n{1}\n"), file->ti->m_fname, Y("The file headers could not be parsed, e.g. because they're incomplete, invalid or damaged.")));
433 
434     } catch (mtx::input::exception &error) {
435       mxerror(fmt::format(Y("The demultiplexer for the file '{0}' failed to initialize:\n{1}\n"), file->ti->m_fname, error.error()));
436     }
437   }
438 }
439