1 /*
2    mkvmerge -- utility for splicing together matroska files
3    from component media subtypes
4 
5    Distributed under the GPL v2
6    see the file COPYING for details
7    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8 
9    the generic_reader_c implementation
10 
11    Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13 
14 #include "common/common_pch.h"
15 
16 #include "common/list_utils.h"
17 #include "common/mm_proxy_io.h"
18 #include "common/strings/formatting.h"
19 #include "common/tags/tags.h"
20 #include "merge/generic_packetizer.h"
21 #include "merge/generic_reader.h"
22 #include "merge/input_x.h"
23 #include "merge/output_control.h"
24 
25 static mtx_mp_rational_t s_probe_range_percentage{3, 10}; // 0.3%
26 
27 // ----------------------------------------------------------------------
28 
29 template<typename T>
30 void
add_all_requested_track_ids(generic_reader_c & reader,T const & container)31 add_all_requested_track_ids(generic_reader_c &reader,
32                             T const &container) {
33   for (auto const &pair : container)
34     reader.add_requested_track_id(pair.first);
35 }
36 
37 void
set_timestamp_restrictions(timestamp_c const & min,timestamp_c const & max)38 generic_reader_c::set_timestamp_restrictions(timestamp_c const &min,
39                                              timestamp_c const &max) {
40   m_restricted_timestamps_min = min;
41   m_restricted_timestamps_max = max;
42 }
43 
44 timestamp_c const &
get_timestamp_restriction_min() const45 generic_reader_c::get_timestamp_restriction_min()
46   const {
47   return m_restricted_timestamps_min;
48 }
49 
50 timestamp_c const &
get_timestamp_restriction_max() const51 generic_reader_c::get_timestamp_restriction_max()
52   const {
53   return m_restricted_timestamps_max;
54 }
55 
56 void
read_all()57 generic_reader_c::read_all() {
58   for (auto &packetizer : m_reader_packetizers)
59     while (read(packetizer.get(), true) != FILE_STATUS_DONE)
60       ;
61 }
62 
63 file_status_e
read_next(generic_packetizer_c * packetizer,bool force)64 generic_reader_c::read_next(generic_packetizer_c *packetizer,
65                             bool force) {
66   auto prior_progrss = get_progress();
67   auto result        = read(packetizer, force);
68   auto new_progress  = get_progress();
69 
70   add_to_progress(new_progress - prior_progrss);
71 
72   return result;
73 }
74 
75 bool
demuxing_requested(char type,int64_t id,mtx::bcp47::language_c const & language) const76 generic_reader_c::demuxing_requested(char type,
77                                      int64_t id,
78                                      mtx::bcp47::language_c const &language)
79   const {
80   static debugging_option_c s_debug{"demuxing_requested"};
81 
82   auto const &tracks = 'v' == type ? m_ti.m_vtracks
83                      : 'a' == type ? m_ti.m_atracks
84                      : 's' == type ? m_ti.m_stracks
85                      : 'b' == type ? m_ti.m_btracks
86                      :               m_ti.m_track_tags;
87 
88   auto result = tracks.selected(id, language.is_valid() ? language : mtx::bcp47::language_c::parse("und"));
89 
90   mxdebug_if(s_debug, fmt::format("demuxing_requested? {4} type {0} id {1} language {2} item_selector {3}\n", type, id, language, tracks, result ? "yes" : "no"));
91 
92   return result;
93 }
94 
95 attach_mode_e
attachment_requested(int64_t id)96 generic_reader_c::attachment_requested(int64_t id) {
97   if (m_ti.m_attach_mode_list.none())
98     return ATTACH_MODE_SKIP;
99 
100   if (m_ti.m_attach_mode_list.empty())
101     return ATTACH_MODE_TO_ALL_FILES;
102 
103   if (m_ti.m_attach_mode_list.selected(id))
104     return m_ti.m_attach_mode_list.get(id);
105 
106   if (m_ti.m_attach_mode_list.selected(-1))
107     return m_ti.m_attach_mode_list.get(-1);
108 
109   return ATTACH_MODE_SKIP;
110 }
111 
112 int
add_packetizer(generic_packetizer_c * packetizer)113 generic_reader_c::add_packetizer(generic_packetizer_c *packetizer) {
114   if (outputting_webm() && !packetizer->is_compatible_with(OC_WEBM))
115     mxerror(fmt::format(Y("The codec type '{0}' cannot be used in a WebM compliant file.\n"), packetizer->get_format_name()));
116 
117   m_reader_packetizers.emplace_back(packetizer);
118   m_used_track_ids.push_back(packetizer->m_ti.m_id);
119   if (!m_appending)
120     add_packetizer_globally(packetizer);
121 
122   return m_reader_packetizers.size() - 1;
123 }
124 
125 size_t
get_num_packetizers() const126 generic_reader_c::get_num_packetizers()
127   const
128 {
129   return m_reader_packetizers.size();
130 }
131 
132 generic_packetizer_c *
find_packetizer_by_id(int64_t id) const133 generic_reader_c::find_packetizer_by_id(int64_t id)
134   const {
135   auto itr = std::find_if(m_reader_packetizers.begin(), m_reader_packetizers.end(), [id](auto p) { return p->m_ti.m_id == id; });
136 
137   return itr != m_reader_packetizers.end() ? (*itr).get() : nullptr;
138 }
139 
140 void
set_timestamp_offset(int64_t offset)141 generic_reader_c::set_timestamp_offset(int64_t offset) {
142   m_max_timestamp_seen = offset;
143 
144   for (auto ptzr : m_reader_packetizers)
145     ptzr->m_correction_timestamp_offset = offset;
146 }
147 
148 void
set_headers()149 generic_reader_c::set_headers() {
150   for (auto ptzr : m_reader_packetizers)
151     ptzr->set_headers();
152 }
153 
154 void
set_headers_for_track(int64_t tid)155 generic_reader_c::set_headers_for_track(int64_t tid) {
156   for (auto ptzr : m_reader_packetizers)
157     if (ptzr->m_ti.m_id == tid) {
158       ptzr->set_headers();
159       break;
160     }
161 }
162 
163 void
check_track_ids_and_packetizers()164 generic_reader_c::check_track_ids_and_packetizers() {
165   add_available_track_ids();
166 
167   auto const available_ids = std::unordered_set<int64_t>{m_available_track_ids.begin(), m_available_track_ids.end()};
168   auto const not_found     = available_ids.end();
169 
170   for (auto requested_id : m_requested_track_ids)
171     if (available_ids.find(requested_id) == not_found)
172       mxwarn_fn(m_ti.m_fname, fmt::format(Y("A track with the ID {0} was requested but not found in the file. The corresponding option will be ignored.\n"), requested_id));
173 }
174 
175 void
add_requested_track_id(int64_t id)176 generic_reader_c::add_requested_track_id(int64_t id) {
177   if (-1 != id)
178     m_requested_track_ids.insert(id);
179 }
180 
181 int64_t
get_queued_bytes() const182 generic_reader_c::get_queued_bytes()
183   const {
184   int64_t bytes = 0;
185 
186   for (auto ptzr : m_reader_packetizers)
187     bytes += ptzr->get_queued_bytes();
188 
189   return bytes;
190 }
191 
192 file_status_e
flush_packetizer(int num)193 generic_reader_c::flush_packetizer(int num) {
194   return flush_packetizer(&ptzr(num));
195 }
196 
197 file_status_e
flush_packetizer(generic_packetizer_c * packetizer)198 generic_reader_c::flush_packetizer(generic_packetizer_c *packetizer) {
199   packetizer->flush();
200 
201   return FILE_STATUS_DONE;
202 }
203 
204 file_status_e
flush_packetizers()205 generic_reader_c::flush_packetizers() {
206   for (auto ptzr : m_reader_packetizers)
207     ptzr->flush();
208 
209   return FILE_STATUS_DONE;
210 }
211 
212 translatable_string_c
get_format_name() const213 generic_reader_c::get_format_name()
214   const {
215   return mtx::file_type_t::get_name(get_format_type());
216 }
217 
218 void
id_result_container(mtx::id::verbose_info_t const & verbose_info)219 generic_reader_c::id_result_container(mtx::id::verbose_info_t const &verbose_info) {
220   auto type                           = get_format_type();
221   m_id_results_container.info         = mtx::file_type_t::get_name(type).get_translated();
222   m_id_results_container.verbose_info = verbose_info;
223   m_id_results_container.verbose_info.emplace_back("container_type",          static_cast<int>(type));
224   m_id_results_container.verbose_info.emplace_back("is_providing_timestamps", is_providing_timestamps());
225 }
226 
227 void
id_result_track(int64_t track_id,std::string const & type,std::string const & info,mtx::id::verbose_info_t const & verbose_info)228 generic_reader_c::id_result_track(int64_t track_id,
229                                   std::string const &type,
230                                   std::string const &info,
231                                   mtx::id::verbose_info_t const &verbose_info) {
232   id_result_t result(track_id, type, info, {}, 0);
233   result.verbose_info = verbose_info;
234   m_id_results_tracks.push_back(result);
235 }
236 
237 void
id_result_attachment(int64_t attachment_id,std::string const & type,int size,std::string const & file_name,std::string const & description,std::optional<uint64_t> id)238 generic_reader_c::id_result_attachment(int64_t attachment_id,
239                                        std::string const &type,
240                                        int size,
241                                        std::string const &file_name,
242                                        std::string const &description,
243                                        std::optional<uint64_t> id) {
244   id_result_t result(attachment_id, type, file_name, description, size);
245   if (id)
246     result.verbose_info.emplace_back("uid", *id);
247   m_id_results_attachments.push_back(result);
248 }
249 
250 void
id_result_chapters(int num_entries)251 generic_reader_c::id_result_chapters(int num_entries) {
252   id_result_t result(0, ID_RESULT_CHAPTERS, {}, {}, num_entries);
253   m_id_results_chapters.push_back(result);
254 }
255 
256 void
id_result_tags(int64_t track_id,int num_entries)257 generic_reader_c::id_result_tags(int64_t track_id,
258                                  int num_entries) {
259   id_result_t result(track_id, ID_RESULT_TAGS, {}, {}, num_entries);
260   m_id_results_tags.push_back(result);
261 }
262 
263 void
display_identification_results()264 generic_reader_c::display_identification_results() {
265   if (identification_output_format_e::json == g_identification_output_format)
266     display_identification_results_as_json();
267   else
268     display_identification_results_as_text();
269 }
270 
271 void
display_identification_results_as_text()272 generic_reader_c::display_identification_results_as_text() {
273   mxinfo(fmt::format(Y("File '{0}': container: {1}"), m_ti.m_fname, m_id_results_container.info));
274   mxinfo("\n");
275 
276   for (auto &result : m_id_results_tracks) {
277     mxinfo(fmt::format(Y("Track ID {0}: {1} ({2})"), result.id, result.type, result.info));
278     mxinfo("\n");
279   }
280 
281   for (auto &result : m_id_results_attachments) {
282     mxinfo(fmt::format(Y("Attachment ID {0}: type '{1}', size {2} bytes"), result.id, result.type, result.size));
283 
284     if (!result.description.empty())
285       mxinfo(fmt::format(Y(", description '{0}'"), result.description));
286 
287     if (!result.info.empty())
288       mxinfo(fmt::format(Y(", file name '{0}'"), result.info));
289 
290     mxinfo("\n");
291   }
292 
293   for (auto &result : m_id_results_chapters) {
294     mxinfo(fmt::format(NY("Chapters: {0} entry", "Chapters: {0} entries", result.size), result.size));
295     mxinfo("\n");
296   }
297 
298   for (auto &result : m_id_results_tags) {
299     if (ID_RESULT_GLOBAL_TAGS_ID == result.id)
300       mxinfo(fmt::format(NY("Global tags: {0} entry", "Global tags: {0} entries", result.size), result.size));
301 
302     else
303       mxinfo(fmt::format(NY("Tags for track ID {0}: {1} entry", "Tags for track ID {0}: {1} entries", result.size), result.id, result.size));
304 
305     mxinfo("\n");
306   }
307 }
308 
309 void
display_identification_results_as_json()310 generic_reader_c::display_identification_results_as_json() {
311   auto verbose_info_to_object = [](mtx::id::verbose_info_t const &verbose_info) -> nlohmann::json {
312     auto object = nlohmann::json{};
313     for (auto const &property : verbose_info)
314       object[property.first] = property.second;
315 
316     return object.is_null() ? nlohmann::json::object() : object;
317   };
318 
319   auto json = nlohmann::json{
320     { "identification_format_version", ID_JSON_FORMAT_VERSION  },
321     { "file_name",                     m_ti.m_fname            },
322     { "tracks",                        nlohmann::json::array() },
323     { "attachments",                   nlohmann::json::array() },
324     { "chapters",                      nlohmann::json::array() },
325     { "global_tags",                   nlohmann::json::array() },
326     { "track_tags",                    nlohmann::json::array() },
327     { "container", {
328         { "recognized", true                                                        },
329         { "supported",  true                                                        },
330         { "type",       m_id_results_container.info                                 },
331         { "properties", verbose_info_to_object(m_id_results_container.verbose_info) },
332       } },
333   };
334 
335   for (auto const &result : m_id_results_tracks)
336     json["tracks"] += nlohmann::json{
337       { "id",         result.id                                   },
338       { "type",       result.type                                 },
339       { "codec",      result.info                                 },
340       { "properties", verbose_info_to_object(result.verbose_info) },
341     };
342 
343   for (auto const &result : m_id_results_attachments)
344     json["attachments"] += nlohmann::json{
345       { "id",           result.id                                   },
346       { "content_type", result.type                                 },
347       { "size",         result.size                                 },
348       { "description",  result.description                          },
349       { "file_name",    result.info                                 },
350       { "properties",   verbose_info_to_object(result.verbose_info) },
351     };
352 
353   for (auto const &result : m_id_results_chapters)
354     json["chapters"] += nlohmann::json{
355       { "num_entries", result.size },
356     };
357 
358   for (auto const &result : m_id_results_tags) {
359     if (ID_RESULT_GLOBAL_TAGS_ID == result.id)
360       json["global_tags"] += nlohmann::json{
361         { "num_entries", result.size },
362       };
363     else
364       json["track_tags"] += nlohmann::json{
365         { "track_id",    result.id   },
366         { "num_entries", result.size },
367       };
368   }
369 
370   display_json_output(json);
371 }
372 
373 void
add_available_track_id(int64_t id)374 generic_reader_c::add_available_track_id(int64_t id) {
375   m_available_track_ids.push_back(id);
376 }
377 
378 void
add_available_track_ids()379 generic_reader_c::add_available_track_ids() {
380   add_available_track_id(0);
381 }
382 
383 void
add_available_track_id_range(int64_t start,int64_t end)384 generic_reader_c::add_available_track_id_range(int64_t start,
385                                                int64_t end) {
386   for (int64_t id = start; id <= end; ++id)
387     add_available_track_id(id);
388 }
389 
390 int64_t
get_progress()391 generic_reader_c::get_progress() {
392   return m_in->getFilePointer();
393 }
394 
395 int64_t
get_maximum_progress()396 generic_reader_c::get_maximum_progress() {
397   return m_in->get_size();
398 }
399 
400 mm_io_c *
get_underlying_input(mm_io_c * actual_in) const401 generic_reader_c::get_underlying_input(mm_io_c *actual_in)
402   const {
403   if (!actual_in)
404     actual_in = m_in.get();
405 
406   while (dynamic_cast<mm_proxy_io_c *>(actual_in))
407     actual_in = static_cast<mm_proxy_io_c *>(actual_in)->get_proxied();
408   return actual_in;
409 }
410 
411 void
set_probe_range_percentage(mtx_mp_rational_t const & probe_range_percentage)412 generic_reader_c::set_probe_range_percentage(mtx_mp_rational_t const &probe_range_percentage) {
413   s_probe_range_percentage = probe_range_percentage;
414 }
415 
416 int64_t
calculate_probe_range(int64_t file_size,int64_t fixed_minimum) const417 generic_reader_c::calculate_probe_range(int64_t file_size,
418                                         int64_t fixed_minimum)
419   const {
420   static debugging_option_c s_debug{"probe_range"};
421 
422   auto factor      = mtx_mp_rational_t{1, 100} * s_probe_range_percentage;
423   auto probe_range = mtx::to_int(factor * file_size);
424   auto to_use      = std::max(fixed_minimum, probe_range);
425 
426   mxdebug_if(s_debug,
427              fmt::format("calculate_probe_range: calculated {0} based on file size {1} fixed minimum {2} percentage {3}/{4} percentage of size {5}\n",
428                          to_use, file_size, fixed_minimum, boost::multiprecision::numerator(s_probe_range_percentage), boost::multiprecision::denominator(s_probe_range_percentage), probe_range));
429 
430   return to_use;
431 }
432 
433 void
set_file_to_read(mm_io_cptr const & in)434 generic_reader_c::set_file_to_read(mm_io_cptr const &in) {
435   m_in   = in;
436   m_size = in->get_size();
437 }
438 
439 void
set_probe_range_info(probe_range_info_t const & info)440 generic_reader_c::set_probe_range_info(probe_range_info_t const &info) {
441   m_probe_range_info = info;
442 }
443 
444 void
set_track_info(track_info_c const & info)445 generic_reader_c::set_track_info(track_info_c const &info) {
446   m_ti = info;
447 
448   add_all_requested_track_ids(*this, m_ti.m_atracks.m_items);
449   add_all_requested_track_ids(*this, m_ti.m_vtracks.m_items);
450   add_all_requested_track_ids(*this, m_ti.m_stracks.m_items);
451   add_all_requested_track_ids(*this, m_ti.m_btracks.m_items);
452   add_all_requested_track_ids(*this, m_ti.m_track_tags.m_items);
453   add_all_requested_track_ids(*this, m_ti.m_all_fourccs);
454   add_all_requested_track_ids(*this, m_ti.m_display_properties);
455   add_all_requested_track_ids(*this, m_ti.m_timestamp_syncs);
456   add_all_requested_track_ids(*this, m_ti.m_cue_creations);
457   add_all_requested_track_ids(*this, m_ti.m_default_track_flags);
458   add_all_requested_track_ids(*this, m_ti.m_fix_bitstream_frame_rate_flags);
459   add_all_requested_track_ids(*this, m_ti.m_languages);
460   add_all_requested_track_ids(*this, m_ti.m_sub_charsets);
461   add_all_requested_track_ids(*this, m_ti.m_all_tags);
462   add_all_requested_track_ids(*this, m_ti.m_all_aac_is_sbr);
463   add_all_requested_track_ids(*this, m_ti.m_compression_list);
464   add_all_requested_track_ids(*this, m_ti.m_track_names);
465   add_all_requested_track_ids(*this, m_ti.m_all_ext_timestamps);
466   add_all_requested_track_ids(*this, m_ti.m_pixel_crop_list);
467   add_all_requested_track_ids(*this, m_ti.m_reduce_to_core);
468 }
469 
470 void
add_track_tags_to_identification(libmatroska::KaxTags const & tags,mtx::id::info_c & info)471 generic_reader_c::add_track_tags_to_identification(libmatroska::KaxTags const &tags,
472                                                    mtx::id::info_c &info) {
473   for (auto const &tag_elt : tags) {
474     auto tag = dynamic_cast<libmatroska::KaxTag *>(tag_elt);
475     if (!tag)
476       continue;
477 
478     for (auto const &simple_tag_elt : *tag) {
479       auto simple_tag = dynamic_cast<libmatroska::KaxTagSimple *>(simple_tag_elt);
480       if (!simple_tag)
481         continue;
482       auto name  = mtx::tags::get_simple_name(*simple_tag);
483       auto value = mtx::tags::get_simple_value(*simple_tag);
484 
485       if (!name.empty()) {
486         info.add(fmt::format("tag_{0}", balg::to_lower_copy(name)), value);
487       }
488     }
489   }
490 }
491 
492 void
show_demuxer_info()493 generic_reader_c::show_demuxer_info() {
494   if (verbose)
495     mxinfo_fn(m_ti.m_fname, fmt::format(Y("Using the demultiplexer for the format '{0}'.\n"), get_format_name()));
496 }
497 
498 void
show_packetizer_info(int64_t track_id,generic_packetizer_c const & packetizer)499 generic_reader_c::show_packetizer_info(int64_t track_id,
500                                        generic_packetizer_c const &packetizer) {
501   mxinfo_tid(m_ti.m_fname, track_id, fmt::format(Y("Using the output module for the format '{0}'.\n"), packetizer.get_format_name()));
502 }
503 
504 generic_packetizer_c &
ptzr(int64_t track_idx)505 generic_reader_c::ptzr(int64_t track_idx) {
506   return *m_reader_packetizers[track_idx];
507 }
508