1 /*
2    mkvmerge -- utility for splicing together matroska files
3    from component media subtypes
4 
5    Distributed under the GPL v2
6    see the file COPYING for details
7    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8 
9    Matroska file analyzer and updater
10 
11    Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13 
14 #include "common/common_pch.h"
15 
16 #include <algorithm>
17 
18 #include <ebml/EbmlStream.h>
19 #include <ebml/EbmlSubHead.h>
20 #include <ebml/EbmlVoid.h>
21 #include <matroska/KaxCluster.h>
22 #include <matroska/KaxCues.h>
23 #include <matroska/KaxCuesData.h>
24 #include <matroska/KaxSeekHead.h>
25 #include <matroska/KaxSegment.h>
26 #include <matroska/KaxTags.h>
27 
28 #include "common/bitvalue.h"
29 #include "common/construct.h"
30 #include "common/doc_type_version_handler.h"
31 #include "common/ebml.h"
32 #include "common/endian.h"
33 #include "common/error.h"
34 #include "common/list_utils.h"
35 #include "common/kax_analyzer.h"
36 #include "common/mm_io_x.h"
37 #include "common/mm_file_io.h"
38 #include "common/mm_proxy_io.h"
39 #include "common/mm_read_buffer_io.h"
40 #include "common/strings/editing.h"
41 #include "common/strings/formatting.h"
42 
43 using namespace libebml;
44 using namespace libmatroska;
45 
46 namespace {
47 
48 constexpr auto CONSOLE_PERCENTAGE_WIDTH = 25;
49 
50 template<typename Tmaster,
51          typename Telement>
52 kax_analyzer_c::update_element_result_e
update_uid_referrals(kax_analyzer_c & analyzer,std::unordered_map<uint64_t,uint64_t> const & changes)53 update_uid_referrals(kax_analyzer_c &analyzer,
54                      std::unordered_map<uint64_t, uint64_t> const &changes) {
55   auto master = analyzer.read_all(Tmaster::ClassInfos);
56   if (!master)
57     return kax_analyzer_c::uer_success;
58 
59   auto modified = change_values<Telement>(static_cast<EbmlMaster &>(*master), changes);
60 
61   return modified ? analyzer.update_element(master) : kax_analyzer_c::uer_success;
62 }
63 
64 }
65 
66 bool
operator <(const kax_analyzer_data_cptr & d1,const kax_analyzer_data_cptr & d2)67 operator <(const kax_analyzer_data_cptr &d1,
68            const kax_analyzer_data_cptr &d2) {
69   return d1->m_pos < d2->m_pos;
70 }
71 
72 std::string
to_string() const73 kax_analyzer_data_c::to_string() const {
74   const EbmlCallbacks *callbacks = find_ebml_callbacks(EBML_INFO(KaxSegment), m_id);
75 
76   if (!callbacks && Is<EbmlVoid>(m_id))
77     callbacks = &EBML_CLASS_CALLBACK(EbmlVoid);
78 
79   std::string name;
80   if (callbacks)
81     name = EBML_INFO_NAME(*callbacks);
82 
83   else
84     name = fmt::format("0x{0:0{1}x}", EBML_ID_VALUE(m_id), EBML_ID_LENGTH(m_id) * 2);
85 
86   return fmt::format("{0} size {1}{3} at {2}", name, m_size, m_pos, m_size_known ? "" : " (unknown)");
87 }
88 
kax_analyzer_c(std::string file_name)89 kax_analyzer_c::kax_analyzer_c(std::string file_name)
90 {
91   m_file_name  = file_name;
92   m_close_file = true;
93 }
94 
kax_analyzer_c(mm_io_cptr const & file)95 kax_analyzer_c::kax_analyzer_c(mm_io_cptr const &file)
96 {
97   m_file_name  = file->get_file_name();
98   m_file       = file;
99   m_close_file = false;
100 }
101 
~kax_analyzer_c()102 kax_analyzer_c::~kax_analyzer_c() {
103   close_file();
104 }
105 
106 void
close_file()107 kax_analyzer_c::close_file() {
108   if (m_close_file) {
109     m_file.reset();
110     m_stream.reset();
111   }
112 }
113 
114 void
reopen_file()115 kax_analyzer_c::reopen_file() {
116   if (m_file)
117     return;
118 
119   try {
120     m_file = std::make_shared<mm_file_io_c>(m_file_name, m_open_mode);
121     if (MODE_READ == m_open_mode)
122       m_file = std::make_shared<mm_read_buffer_io_c>(m_file);
123 
124   } catch (mtx::mm_io::exception &) {
125     m_file.reset();
126 
127     throw MODE_READ == m_open_mode ? uer_error_opening_for_reading : uer_error_opening_for_writing;
128   }
129 
130   m_stream = std::make_shared<EbmlStream>(*m_file);
131 }
132 
133 void
reopen_file_for_writing()134 kax_analyzer_c::reopen_file_for_writing() {
135   if (m_file && (MODE_WRITE == m_open_mode))
136     return;
137 
138   m_file.reset();
139   m_open_mode = MODE_WRITE;
140 
141   reopen_file();
142 }
143 
144 void
_log_debug_message(const std::string & message)145 kax_analyzer_c::_log_debug_message(const std::string &message) {
146   mxinfo(message);
147 }
148 
149 bool
analyzer_debugging_requested(const std::string & section)150 kax_analyzer_c::analyzer_debugging_requested(const std::string &section) {
151   return m_debug || debugging_c::requested("kax_analyzer_"s + section);
152 }
153 
154 void
debug_dump_elements()155 kax_analyzer_c::debug_dump_elements() {
156   size_t i;
157   for (i = 0; i < m_data.size(); i++)
158     log_debug_message(fmt::format("{0}: {1}\n", i, m_data[i]->to_string()));
159 }
160 
161 void
debug_dump_elements_maybe(const std::string & hook_name)162 kax_analyzer_c::debug_dump_elements_maybe(const std::string &hook_name) {
163   if (!analyzer_debugging_requested(hook_name))
164     return;
165 
166   log_debug_message(fmt::format("kax_analyzer_{0} dumping elements:\n", hook_name));
167   debug_dump_elements();
168 }
169 
170 void
validate_data_structures(const std::string & hook_name)171 kax_analyzer_c::validate_data_structures(const std::string &hook_name) {
172   if (m_data.empty())
173     return;
174 
175   bool gap_debugging = analyzer_debugging_requested("gaps");
176   bool ok            = true;
177   size_t i;
178 
179   for (i = 0; m_data.size() -1 > i; i++) {
180     if ((m_data[i]->m_pos + m_data[i]->m_size) > m_data[i + 1]->m_pos) {
181       log_debug_message(fmt::format("kax_analyzer_{0}: Interal data structure corruption at pos {1} (size + position > next position); dumping elements\n", hook_name, i));
182       ok = false;
183     } else if (gap_debugging && ((m_data[i]->m_pos + m_data[i]->m_size) < m_data[i + 1]->m_pos)) {
184       log_debug_message(fmt::format("kax_analyzer_{0}: Gap found at pos {1} (size + position < next position); dumping elements\n", hook_name, i));
185       ok = false;
186     }
187   }
188 
189   if (!ok) {
190     debug_dump_elements();
191 
192     if (m_throw_on_error)
193       throw mtx::kax_analyzer_x{Y("The data in the file is corrupted and cannot be modified safely")};
194 
195     debug_abort_process();
196   }
197 }
198 
199 void
verify_data_structures_against_file(const std::string & hook_name)200 kax_analyzer_c::verify_data_structures_against_file(const std::string &hook_name) {
201   kax_analyzer_c actual_content(m_file);
202   actual_content.process();
203 
204   unsigned int num_items = std::max(m_data.size(), actual_content.m_data.size());
205   bool ok                = m_data.size() == actual_content.m_data.size();
206   size_t max_info_len    = 0;
207   std::vector<std::string> info_this, info_actual, info_markings;
208   size_t i;
209 
210   for (i = 0; num_items > i; ++i) {
211     info_this.push_back(                 m_data.size() > i ?                m_data[i]->to_string() : std::string{});
212     info_actual.push_back(actual_content.m_data.size() > i ? actual_content.m_data[i]->to_string() : std::string{});
213 
214     max_info_len           = std::max(max_info_len, info_this.back().length());
215 
216     bool row_is_identical  = info_this.back() == info_actual.back();
217     ok                    &= row_is_identical;
218 
219     info_markings.emplace_back(row_is_identical ? " " : "*");
220   }
221 
222   if (ok)
223     return;
224 
225   log_debug_message(fmt::format("verify_data_structures_against_file({0}) failed. Dumping this on the left, actual on the right.\n", hook_name));
226 
227   for (i = 0; num_items > i; ++i)
228     log_debug_message(fmt::format("{0} {1:<{2}s} {3}\n", info_markings[i], info_this[i], max_info_len, info_actual[i]));
229 
230   debug_abort_process();
231 }
232 
233 bool
probe(std::string file_name)234 kax_analyzer_c::probe(std::string file_name) {
235   try {
236     unsigned char data[4];
237     mm_file_io_c in(file_name.c_str());
238 
239     if (in.read(data, 4) != 4)
240       return false;
241 
242     return ((0x1a == data[0]) && (0x45 == data[1]) && (0xdf == data[2]) && (0xa3 == data[3]));
243   } catch (...) {
244     return false;
245   }
246 }
247 
248 kax_analyzer_c &
set_parse_mode(parse_mode_e parse_mode)249 kax_analyzer_c::set_parse_mode(parse_mode_e parse_mode) {
250   m_parse_mode = parse_mode;
251   return *this;
252 }
253 
254 kax_analyzer_c &
set_open_mode(open_mode mode)255 kax_analyzer_c::set_open_mode(open_mode mode) {
256   m_open_mode = mode;
257   return *this;
258 }
259 
260 kax_analyzer_c &
set_throw_on_error(bool throw_on_error)261 kax_analyzer_c::set_throw_on_error(bool throw_on_error) {
262   m_throw_on_error = throw_on_error;
263   return *this;
264 }
265 
266 kax_analyzer_c &
set_parser_start_position(uint64_t position)267 kax_analyzer_c::set_parser_start_position(uint64_t position) {
268   m_parser_start_position = position;
269   return *this;
270 }
271 
272 kax_analyzer_c &
set_doc_type_version_handler(mtx::doc_type_version_handler_c * handler)273 kax_analyzer_c::set_doc_type_version_handler(mtx::doc_type_version_handler_c *handler) {
274   m_doc_type_version_handler = handler;
275   return *this;
276 }
277 
278 bool
process()279 kax_analyzer_c::process() {
280   try {
281     auto result = process_internal();
282     mxdebug_if(m_debug, fmt::format("kax_analyzer: parsing file '{0}' result {1}\n", m_file->get_file_name(), result));
283 
284     return result;
285 
286   } catch (...) {
287     mxdebug_if(m_debug, fmt::format("kax_analyzer: parsing file '{0}' failed with an exception\n", m_file->get_file_name()));
288 
289     show_progress_done();
290 
291     if (m_throw_on_error)
292       throw;
293     return false;
294   }
295 }
296 
297 bool
process_internal()298 kax_analyzer_c::process_internal() {
299   bool parse_fully = parse_mode_full == m_parse_mode;
300 
301   reopen_file();
302 
303   int64_t file_size = m_file->get_size();
304   show_progress_start(file_size);
305 
306   m_segment.reset();
307   m_data.clear();
308 
309   m_file->setFilePointer(0);
310   m_stream = std::make_shared<EbmlStream>(*m_file);
311 
312   // Find the EbmlHead element. Must be the first one.
313   m_ebml_head.reset(static_cast<EbmlHead *>(m_stream->FindNextID(EBML_INFO(EbmlHead), 0xFFFFFFFFL)));
314   if (!m_ebml_head || !Is<EbmlHead>(*m_ebml_head))
315     throw mtx::kax_analyzer_x(Y("Not a valid Matroska file (no EBML head found)"));
316 
317   EbmlElement *l0{};
318   int upper_lvl_el{};
319 
320   m_ebml_head->Read(*m_stream, EBML_CONTEXT(m_ebml_head.get()), upper_lvl_el, l0, true, SCOPE_ALL_DATA);
321   m_ebml_head->SkipData(*m_stream, EBML_CONTEXT(m_ebml_head.get()));
322 
323   determine_webm();
324 
325   if (l0) {
326     delete l0;
327     l0 = nullptr;
328   }
329 
330   while (true) {
331     // Next element must be a segment
332     l0 = m_stream->FindNextID(EBML_INFO(KaxSegment), 0xFFFFFFFFFFFFFFFFLL);
333     if (!l0)
334       throw mtx::kax_analyzer_x(Y("Not a valid Matroska file (no segment/level 0 element found)"));
335 
336     if (Is<KaxSegment>(l0))
337       break;
338 
339     l0->SkipData(*m_stream, EBML_CONTEXT(l0));
340     delete l0;
341   }
342 
343   m_segment            = std::shared_ptr<KaxSegment>(static_cast<KaxSegment *>(l0));
344   bool aborted         = false;
345   bool cluster_found   = false;
346   bool meta_seek_found = false;
347   m_segment_end        = m_segment->IsFiniteSize() ? m_segment->GetElementPosition() + m_segment->HeadSize() + m_segment->GetSize() : m_file->get_size();
348   EbmlElement *l1      = nullptr;
349   upper_lvl_el         = 0;
350 
351   // In certain situations the caller doesn't way to have to pay the
352   // price for full analysis. Then it can configure the parser to
353   // start parsing at a certain offset. EbmlStream::FindNextElement()
354   // should take care of re-syncing to a known level 1 element. But
355   // take care not to start before the segment's data start position.
356   if (m_parser_start_position)
357     m_file->setFilePointer(std::max<uint64_t>(*m_parser_start_position, m_segment->GetElementPosition() + m_segment->HeadSize()));
358 
359   // We've got our segment, so let's find all level 1 elements.
360   while (m_file->getFilePointer() < m_segment_end) {
361     if (!l1)
362       l1 = m_stream->FindNextElement(EBML_CONTEXT(l0), upper_lvl_el, 0xFFFFFFFFL, true, 1);
363 
364     if (!l1 || (0 < upper_lvl_el))
365       break;
366 
367     m_data.push_back(kax_analyzer_data_c::create(EbmlId(*l1), l1->GetElementPosition(), l1->ElementSize(true), l1->IsFiniteSize()));
368 
369     cluster_found   |= Is<KaxCluster>(l1);
370     meta_seek_found |= Is<KaxSeekHead>(l1);
371 
372     l1->SkipData(*m_stream, EBML_CONTEXT(l1));
373     delete l1;
374     l1 = nullptr;
375 
376     aborted = !show_progress_running((int)(m_file->getFilePointer() * 100 / file_size));
377 
378     auto in_parent = !m_segment->IsFiniteSize()
379                   || (m_file->getFilePointer() < (m_segment->GetElementPosition() + m_segment->HeadSize() + m_segment->GetSize()));
380 
381     if (!in_parent || aborted || (cluster_found && meta_seek_found && !parse_fully))
382       break;
383 
384   } // while (l1)
385 
386   if (l1)
387     delete l1;
388 
389   if (!aborted && !parse_fully)
390     read_all_meta_seeks();
391 
392   show_progress_done();
393 
394   validate_data_structures("process_internal_end");
395 
396   if (!aborted) {
397     if (parse_mode_full != m_parse_mode)
398       fix_element_sizes(file_size);
399 
400     return true;
401   }
402 
403   m_segment.reset();
404   m_data.clear();
405 
406   return false;
407 }
408 
409 ebml_element_cptr
read_element(kax_analyzer_data_cptr const & element_data)410 kax_analyzer_c::read_element(kax_analyzer_data_cptr const &element_data) {
411   return read_element(*element_data);
412 }
413 
414 ebml_element_cptr
read_element(unsigned int pos)415 kax_analyzer_c::read_element(unsigned int pos) {
416   return read_element(*m_data[pos]);
417 }
418 
419 ebml_element_cptr
read_element(kax_analyzer_data_c const & element_data)420 kax_analyzer_c::read_element(kax_analyzer_data_c const &element_data) {
421   reopen_file();
422 
423   EbmlStream es(*m_file);
424   m_file->setFilePointer(element_data.m_pos);
425 
426   int upper_lvl_el_found         = 0;
427   ebml_element_cptr e            = ebml_element_cptr(es.FindNextElement(EBML_CONTEXT(m_segment), upper_lvl_el_found, 0xFFFFFFFFL, true, 1));
428   const EbmlCallbacks *callbacks = find_ebml_callbacks(EBML_INFO(KaxSegment), element_data.m_id);
429 
430   if (!e || !callbacks || (EbmlId(*e) != EBML_INFO_ID(*callbacks))) {
431     e.reset();
432     return e;
433   }
434 
435   upper_lvl_el_found        = 0;
436   EbmlElement *upper_lvl_el = nullptr;
437   e->Read(*m_stream, EBML_INFO_CONTEXT(*callbacks), upper_lvl_el_found, upper_lvl_el, true);
438 
439   return e;
440 }
441 
442 bool
validate_and_break(std::string const & hook_name)443 kax_analyzer_c::validate_and_break(std::string const &hook_name) {
444   debug_dump_elements_maybe(hook_name);
445   validate_data_structures(hook_name);
446 
447   if (analyzer_debugging_requested("verify"))
448     verify_data_structures_against_file(hook_name);
449 
450   return debugging_c::requested(fmt::format("kax_analyzer_{0}_break", hook_name));
451 }
452 
453 kax_analyzer_c::update_element_result_e
update_element(ebml_element_cptr const & e,bool write_defaults,bool add_mandatory_elements_if_missing)454 kax_analyzer_c::update_element(ebml_element_cptr const &e,
455                                bool write_defaults,
456                                bool add_mandatory_elements_if_missing) {
457   return update_element(e.get(), write_defaults, add_mandatory_elements_if_missing);
458 }
459 
460 kax_analyzer_c::update_element_result_e
update_element(EbmlElement * e,bool write_defaults,bool add_mandatory_elements_if_missing)461 kax_analyzer_c::update_element(EbmlElement *e,
462                                bool write_defaults,
463                                bool add_mandatory_elements_if_missing) {
464   try {
465     reopen_file_for_writing();
466 
467     if (add_mandatory_elements_if_missing)
468       fix_mandatory_elements(e);
469     remove_voids_from_master(e);
470 
471     placement_strategy_e strategy = get_placement_strategy_for(e);
472 
473     if (validate_and_break("update_element_0"))
474       return uer_success;
475 
476     fix_unknown_size_for_last_level1_element();
477     if (validate_and_break("update_element_1"))
478       return uer_success;
479 
480     overwrite_all_instances(EbmlId(*e));
481     if (validate_and_break("update_element_2"))
482       return uer_success;
483 
484     merge_void_elements();
485     if (validate_and_break("update_element_3"))
486       return uer_success;
487 
488     write_element(e, write_defaults, strategy);
489     if (validate_and_break("update_element_4"))
490       return uer_success;
491 
492     remove_from_meta_seeks(EbmlId(*e));
493     if (validate_and_break("update_element_5"))
494       return uer_success;
495 
496     merge_void_elements();
497     if (validate_and_break("update_element_6"))
498       return uer_success;
499 
500     add_to_meta_seek(e);
501     if (validate_and_break("update_element_7"))
502       return uer_success;
503 
504     merge_void_elements();
505     if (validate_and_break("update_element_8"))
506       return uer_success;
507 
508   } catch (kax_analyzer_c::update_element_result_e result) {
509     debug_dump_elements_maybe("update_element_exception");
510     return result;
511 
512   } catch (mtx::mm_io::exception &ex) {
513     mxdebug_if(m_debug, fmt::format("I/O exception: {0}\n", ex.what()));
514     return uer_error_unknown;
515   }
516 
517   return uer_success;
518 }
519 
520 kax_analyzer_c::update_element_result_e
remove_elements(EbmlId const & id)521 kax_analyzer_c::remove_elements(EbmlId const &id) {
522   try {
523     reopen_file_for_writing();
524 
525     if (validate_and_break("remove_elements_0"))
526       return uer_success;
527 
528     fix_unknown_size_for_last_level1_element();
529     if (validate_and_break("remove_elements_1"))
530       return uer_success;
531 
532     overwrite_all_instances(id);
533     if (validate_and_break("remove_elements_2"))
534       return uer_success;
535 
536     merge_void_elements();
537     if (validate_and_break("remove_elements_3"))
538       return uer_success;
539 
540     remove_from_meta_seeks(id);
541     if (validate_and_break("remove_elements_4"))
542       return uer_success;
543 
544     merge_void_elements();
545     if (validate_and_break("remove_elements_5"))
546       return uer_success;
547 
548   } catch (kax_analyzer_c::update_element_result_e result) {
549     debug_dump_elements_maybe("update_element_exception");
550     return result;
551   }
552 
553   return uer_success;
554 }
555 
556 kax_analyzer_c::update_element_result_e
update_uid_referrals(std::unordered_map<uint64_t,uint64_t> const & track_uid_changes)557 kax_analyzer_c::update_uid_referrals(std::unordered_map<uint64_t, uint64_t> const &track_uid_changes) {
558   mxdebug_if(m_debug, fmt::format("kax_analyzer: update_track_uid_referrals: number of changes: {0}\n", track_uid_changes.size()));
559 
560   if (track_uid_changes.empty())
561     return uer_success;
562 
563   mxdebug_if(m_debug, fmt::format("kax_analyzer: update_track_uid_referrals: number of changes: {0}\n", track_uid_changes.size()));
564 
565   auto result = ::update_uid_referrals<KaxChapters, KaxChapterTrackNumber>(*this, track_uid_changes);
566   if (result != uer_success)
567     return result;
568 
569   result = ::update_uid_referrals<KaxTags, KaxTagTrackUID>(*this, track_uid_changes);
570   if (result != uer_success)
571     return result;
572 
573   return uer_success;
574 }
575 
576 /** \brief Sets the m_segment size to the length of the file
577  */
578 void
adjust_segment_size()579 kax_analyzer_c::adjust_segment_size() {
580   // If the old segment's size is unknown then don't try to force a
581   // finite size as this will fail most of the time: an
582   // infinite/unknown size is coded by the value 0 which is often
583   // stored as a single byte (e.g. Haali's muxer does this).
584   if (!m_segment->IsFiniteSize())
585     return;
586 
587   auto new_segment = std::make_shared<KaxSegment>();
588   m_file->setFilePointer(m_segment->GetElementPosition());
589   new_segment->WriteHead(*m_file, m_segment->HeadSize() - 4);
590 
591   m_file->setFilePointer(0, seek_end);
592   if (!new_segment->ForceSize(m_file->getFilePointer() - m_segment->HeadSize() - m_segment->GetElementPosition())) {
593     m_segment->OverwriteHead(*m_file);
594     throw uer_error_segment_size_for_element;
595   }
596 
597   new_segment->OverwriteHead(*m_file);
598   m_segment = new_segment;
599 }
600 
601 /** \brief Create an EbmlVoid element at a specific location
602 
603     This function fills a gap in the file with an EbmlVoid. If an
604     EbmlVoid element is located directly behind the gap then this
605     element is overwritten as well.
606 
607     The function calculates the size of the new void element by taking
608     the next non-EbmlVoid's position and subtracting from it the end
609     position of the current element indicated by the \c data_idx
610     parameter.
611 
612     If the space is not big enough to contain an EbmlVoid element then
613     the EBML head of the following element is moved one byte to the
614     front and its size field is extended by one byte. That way the
615     file stays compatible with all parsers, and only a small number of
616     bytes have to be moved around.
617 
618     The \c m_data member structure is also updated to reflect the
619     changes made to the file.
620 
621     The function relies on \c m_data[data_idx] to be up to date
622     regarding its size. If the size of \c m_data[data_idx] is zero
623     then it is assumed that the element shall be overwritten with an
624     EbmlVoid element, and \c m_data[data_idx] will be removed from the
625     \c m_data structure.
626 
627     \param data_idx Index into the \c m_data structure pointing to the
628      current element after which the gap is located.
629 
630     \return \c true if a new void element was created and \c false if
631       there was no need to create one or if there was not enough
632       space.
633  */
634 bool
handle_void_elements(size_t data_idx)635 kax_analyzer_c::handle_void_elements(size_t data_idx) {
636   static debugging_option_c s_debug_void{"kax_analyzer_handle_void_elements"};
637 
638   // Is the element at the end of the file? If so truncate the file
639   // and remove the element from the data structure if that was
640   // requested. Then we're done.
641   if (m_data.size() == (data_idx + 1)) {
642     mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): element is at end; truncating file\n", data_idx));
643     m_file->truncate(m_data[data_idx]->m_pos + m_data[data_idx]->m_size);
644     adjust_segment_size();
645     if (0 == m_data[data_idx]->m_size)
646       m_data.erase(m_data.begin() + data_idx);
647     return false;
648   }
649 
650   // Are the following elements EbmlVoid elements?
651   size_t end_idx = data_idx + 1;
652   while ((m_data.size() > end_idx) && Is<EbmlVoid>(m_data[end_idx]->m_id))
653     ++end_idx;
654 
655   if (end_idx > data_idx + 1) {
656     mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): {1} void element(s) following; merging\n", data_idx, end_idx - data_idx - 1));
657 
658     // Yes, there is at least one. Remove these elements from the list
659     // in order to create a new EbmlVoid element covering their space
660     // as well.
661     m_data.erase(m_data.begin() + data_idx + 1, m_data.begin() + end_idx);
662   }
663 
664   // Calculate how much space we have to cover with a void
665   // element. This is the difference between the next element's
666   // position and the current element's end.
667   int64_t void_pos = m_data[data_idx]->m_pos + m_data[data_idx]->m_size;
668   int void_size    = m_data[data_idx + 1]->m_pos - void_pos;
669 
670   // If the difference is 0 then we have nothing to do.
671   if (0 == void_size) {
672     mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 0, nothing further to do\n", data_idx));
673     return false;
674   }
675 
676   // See if we have enough space to fit an EbmlVoid element in. An
677   // EbmlVoid element needs at least two bytes (one for the ID, one
678   // for the size).
679   if (1 == void_size) {
680     mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1; move next element's head down one byte & enlarge its size portion\n", data_idx));
681 
682     // No. The most compatible way to deal with this situation is to
683     // move the element ID of the following element one byte to the
684     // front and extend the following element's size field by one
685     // byte.
686 
687     // If the following element's coded size length is 8 bytes
688     // already, we can make the head smaller instead and move it one
689     // byte up. That leaves a gap of two bytes which we can fill with
690     // an empty new EBML Void element. The variable `move_up` reflects
691     // this; it is true the coded size length is less than 8 bytes and
692     // the element's head can be moved up and false if we have to
693     // shrink it and fill the space with a new EBML void.
694 
695     auto e = read_element(m_data[data_idx + 1]);
696 
697     if (!e) {
698       mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1: could not read following element\n", data_idx));
699       return false;
700     }
701 
702     auto move_up = e->GetSizeLength() < 8;
703     auto new_pos = m_data[data_idx + 1]->m_pos + (move_up ? -1 : 1);
704 
705     binary head[4 + 8];         // Class D + 64 bits coded size
706     unsigned int head_size = EBML_ID_LENGTH(static_cast<const EbmlId &>(*e));
707     EbmlId(*e).Fill(head);
708 
709     int coded_size = CodedSizeLength(e->GetSize(), move_up ? e->GetSizeLength() + 1 : 7, true);
710     CodedValueLength(e->GetSize(), coded_size, &head[head_size]);
711     head_size += coded_size;
712 
713     mxdebug_if(s_debug_void,
714                fmt::format("handle_void_elements({0}): void_size == 1: writing {1} header bytes at position {2} (ID length {3} coded size length {4} previous size length {5} element size {6}); bytes: {7}\n",
715                            data_idx, head_size, new_pos, head_size - coded_size, coded_size, e->GetSizeLength(), e->GetSize(), mtx::string::to_hex(head, head_size)));
716 
717     m_file->setFilePointer(new_pos);
718     m_file->write(head, head_size);
719 
720     auto original_position        = m_data[data_idx + 1]->m_pos;
721     m_data[data_idx + 1]->m_pos   = new_pos;
722     m_data[data_idx + 1]->m_size += move_up ? 1 : -1;
723 
724     if (!move_up) {
725       // Need to add an empty new EBML void element.
726       mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1: adding new two-byte EBML void element at position {1}\n", data_idx, new_pos - 2));
727 
728       m_file->setFilePointer(new_pos - 2);
729 
730       EbmlVoid evoid;
731       evoid.SetSize(0);
732       evoid.Render(*m_file);
733 
734       m_data.insert(m_data.begin() + data_idx + 1, kax_analyzer_data_c::create(EBML_ID(EbmlVoid), new_pos - 2, 2));
735       ++data_idx;
736     }
737 
738     // Update meta seek indices for m_data[data_idx]'s new position.
739     e = read_element(m_data[data_idx + 1]);
740 
741     mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1: element re-read; now removing from meta seeks, merging void elements etc.\n", data_idx));
742 
743     remove_from_meta_seeks(EbmlId(*e));
744     merge_void_elements();
745     add_to_meta_seek(e.get());
746     merge_void_elements();
747 
748     if (Is<KaxCluster>(*e)) {
749       mxdebug_if(s_debug_void,
750                  fmt::format("handle_void_elements({0}): shifted element is cluster; adjusting cues; original relative position {1} new relative position {2}\n",
751                              data_idx, m_segment->GetRelativePosition(original_position), m_segment->GetRelativePosition(e->GetElementPosition())));
752       adjust_cues_for_cluster(static_cast<KaxCluster &>(*e), m_segment->GetRelativePosition(original_position));
753     }
754 
755     return false;
756   }
757 
758   m_file->setFilePointer(void_pos);
759 
760   // Yes. Write a new EbmlVoid element and update the internal records.
761 
762   // Calculating the void element's content size. This is not straight
763   // forward because there are special values for the total size for
764   // which the header size must be forced to be one byte more than would
765   // be strictly necessary in order to occupy the space. Here's an
766   // example:
767 
768   // A two-byte header field (one for the ID, one for the content
769   // size) can have a content size of at most 127 bytes, meaning a
770   // total size of 129 is easy to achieve: set the content size to 127
771   // bytes, libEBML will calculate the required length of the content
772   // size field to be 1 and the resulting element's total size will be
773   // the desired 129 bytes.
774 
775   // A content size of 128 would mean that the content size field must be
776   // at least two bytes long. Taking the element's ID into account this
777   // would mean a total element size of 128 + 2 + 1 = 131 bytes. So
778   // getting libEBML to produce an element of a total size of 131 is
779   // easy, too: set the content size to 128 bytes, and libEBML will do
780   // the rest.
781 
782   // The problematic total size is a 130 bytes. There simply is no
783   // content size for which adding the minimal length of the content
784   // size field and 1 for the ID would result in a total size of 130
785   // bytes.
786 
787   // The solution is writing the length field with more bytes than
788   // necessary. In the case of a total size of 130 bytes we could use
789   // the maximum one-byte content size = 127 bytes, add one byte for the
790   // ID and force the content size field's length to be two instead of
791   // one byte (0x407f instead of 0xff).
792 
793   // Similar corner cases exist for the transition between content
794   // size field being two/three bytes, three/four bytes long etc. In
795   // order to keep the code simple we always use an eight-bytes long
796   // content size field if the total size is at least nine bytes and a
797   // one-byte long content size field otherwise.
798 
799   EbmlVoid evoid;
800   if (void_size < 9)
801     evoid.SetSize(void_size - 2);
802 
803   else {
804     evoid.SetSize(void_size - 9);
805     evoid.SetSizeLength(8);
806   }
807 
808   evoid.Render(*m_file);
809 
810   m_data.insert(m_data.begin() + data_idx + 1, kax_analyzer_data_c::create(EBML_ID(EbmlVoid), void_pos, void_size));
811 
812   // Now check if we should overwrite the current element with the
813   // EbmlVoid element. That is the case if the current element's size
814   // is 0. In that case simply remove the element from the data
815   // vector.
816   if (0 == m_data[data_idx]->m_size)
817     m_data.erase(m_data.begin() + data_idx);
818 
819   return true;
820 }
821 
822 void
adjust_cues_for_cluster(KaxCluster const & cluster,uint64_t original_relative_position)823 kax_analyzer_c::adjust_cues_for_cluster(KaxCluster const &cluster,
824                                         uint64_t original_relative_position) {
825   static debugging_option_c s_debug_cues{"kax_analyzer_adjust_cues_for_cluster"};
826 
827   auto cues = read_all(EBML_INFO(KaxCues));
828   if (!cues) {
829     mxdebug_if(s_debug_cues, fmt::format("adjust_cues_for_cluster: no cues found\n"));
830     return;
831   }
832 
833   mxdebug_if(s_debug_cues, fmt::format("adjust_cues_for_cluster: cues found; looking for relative position {0}, rewriting to new position {1}\n", original_relative_position, m_segment->GetRelativePosition(cluster)));
834 
835   auto modified = false;
836 
837   for (auto const &potential_cue_point : *cues) {
838     if (!dynamic_cast<KaxCuePoint *>(potential_cue_point))
839       continue;
840 
841     for (auto const &potential_track_positions : static_cast<KaxCuePoint &>(*potential_cue_point)) {
842       if (!dynamic_cast<KaxCueTrackPositions *>(potential_track_positions))
843         continue;
844 
845       auto cluster_position = FindChild<KaxCueClusterPosition>(static_cast<KaxCueTrackPositions &>(*potential_track_positions));
846       if (!cluster_position || (cluster_position->GetValue() != original_relative_position))
847         continue;
848 
849       cluster_position->SetValue(m_segment->GetRelativePosition(cluster));
850       modified = true;
851     }
852   }
853 
854   mxdebug_if(s_debug_cues, fmt::format("adjust_cues_for_cluster: modifed? {0}\n", modified));
855 
856   if (modified)
857     update_element(cues);
858 }
859 
860 /** \brief Removes all seek entries for a specific element
861 
862     Iterates over the level 1 elements in the file and reads each seek
863     head it finds. All entries for the given \c id are removed from
864     the seek head. If the seek head has been changed then it is
865     rewritten to its original position. The space freed up is filled
866     with a new EbmlVoid element.
867 
868     \param id The ID of the elements that should be removed.
869  */
870 void
remove_from_meta_seeks(EbmlId id)871 kax_analyzer_c::remove_from_meta_seeks(EbmlId id) {
872   size_t data_idx;
873 
874   for (data_idx = 0; m_data.size() > data_idx; ++data_idx) {
875     // We only have to do work on SeekHead elements. Skip the others.
876     if (!Is<KaxSeekHead>(m_data[data_idx]->m_id))
877       continue;
878 
879     // Read the element from the file. Remember its size so that a new
880     // EbmlVoid element can be constructed afterwards.
881     auto element   = read_element(data_idx);
882     auto seek_head = dynamic_cast<KaxSeekHead *>(element.get());
883     if (!seek_head)
884       throw uer_error_unknown;
885 
886     int64_t old_size = seek_head->ElementSize(true);
887 
888     // Iterate over its children and delete the ones we're looking for.
889     bool modified = false;
890     size_t sh_idx = 0;
891     while (seek_head->ListSize() > sh_idx) {
892       if (!Is<KaxSeek>((*seek_head)[sh_idx])) {
893         ++sh_idx;
894         continue;
895       }
896 
897       auto seek_entry = dynamic_cast<KaxSeek *>((*seek_head)[sh_idx]);
898 
899       if (!seek_entry->IsEbmlId(id)) {
900         ++sh_idx;
901         continue;
902       }
903 
904       delete (*seek_head)[sh_idx];
905       seek_head->Remove(sh_idx);
906 
907       modified = true;
908     }
909 
910     // Only rewrite the element to the file if it has been modified.
911     if (!modified)
912       continue;
913 
914     // If the seek head is now empty then simply remove and overwrite
915     // it with a void element.
916     if (0 == seek_head->ListSize()) {
917       m_data[data_idx]->m_size = 0;
918       handle_void_elements(data_idx);
919 
920       continue;
921     }
922 
923     // First make sure the new element is smaller than the old one.
924     // The following code cannot deal with the other case.
925     seek_head->UpdateSize(true);
926     int64_t new_size = seek_head->ElementSize(true);
927     if (new_size > old_size)
928       throw uer_error_unknown;
929 
930     // Overwrite the element itself and update its internal record.
931     m_file->setFilePointer(m_data[data_idx]->m_pos);
932     seek_head->Render(*m_file, true);
933     if (m_doc_type_version_handler)
934       m_doc_type_version_handler->account(*seek_head, true);
935 
936     m_data[data_idx]->m_size = new_size;
937 
938     // Create a void element to cover the freed space.
939     handle_void_elements(data_idx);
940   }
941 }
942 
943 /** \brief Overwrites all instances of a specific level 1 element
944 
945     Iterates over the level 1 elements in the file and overwrites
946     each instance of a specific level 1 element given by \c id.
947     It is replaced with a new EbmlVoid element.
948 
949     \param id The ID of the elements that should be overwritten.
950  */
951 void
overwrite_all_instances(EbmlId id)952 kax_analyzer_c::overwrite_all_instances(EbmlId id) {
953   size_t data_idx;
954 
955   for (data_idx = 0; m_data.size() > data_idx; ++data_idx) {
956     // We only have to do work on specific elements. Skip the others.
957     if (m_data[data_idx]->m_id != id)
958       continue;
959 
960     // Overwrite with a void element.
961     m_data[data_idx]->m_size = 0;
962     handle_void_elements(data_idx);
963   }
964 }
965 
966 /** \brief Merges consecutive EbmlVoid elements into a single one
967 
968     Iterates over the level 1 elements in the file and merges
969     consecutive EbmlVoid elements into a single one which covers
970     the same space as the smaller ones combined.
971 
972     Void elements at the end of the file are removed as well.
973  */
974 void
merge_void_elements()975 kax_analyzer_c::merge_void_elements() {
976   size_t start_idx = 0;
977 
978   while (m_data.size() > start_idx) {
979     // We only have to do work on EbmlVoid elements. Skip the others.
980     if (!Is<EbmlVoid>(m_data[start_idx]->m_id)) {
981       ++start_idx;
982       continue;
983     }
984 
985     // Found an EbmlVoid element. See how many consecutive EbmlVoid elements
986     // there are at this position and calculate the combined size.
987     size_t end_idx  = start_idx + 1;
988     size_t new_size = m_data[start_idx]->m_size;
989     while ((m_data.size() > end_idx) && Is<EbmlVoid>(m_data[end_idx]->m_id)) {
990       new_size += m_data[end_idx]->m_size;
991       ++end_idx;
992     }
993 
994     // Is this only a single EbmlVoid element? If yes continue.
995     if (end_idx == (start_idx + 1)) {
996       start_idx += 2;
997       continue;
998     }
999 
1000     // Write the new EbmlVoid element to the file.
1001     m_file->setFilePointer(m_data[start_idx]->m_pos);
1002 
1003     EbmlVoid evoid;
1004     evoid.SetSize(new_size);
1005     evoid.UpdateSize();
1006     evoid.SetSize(new_size - evoid.HeadSize());
1007     evoid.Render(*m_file);
1008 
1009     // Update the internal records to reflect the changes.
1010     m_data[start_idx]->m_size = new_size;
1011     m_data.erase(m_data.begin() + start_idx + 1, m_data.begin() + end_idx);
1012 
1013     start_idx += 2;
1014   }
1015 
1016   // See how many void elements there are at the end of the file.
1017   start_idx = m_data.size();
1018 
1019   while ((0 < start_idx) && Is<EbmlVoid>(m_data[start_idx - 1]->m_id))
1020     --start_idx;
1021 
1022   // If there are none then we're done.
1023   if (m_data.size() <= start_idx)
1024     return;
1025 
1026   // Truncate the file after the last non-void element and update the segment size.
1027   m_file->truncate(m_data[start_idx]->m_pos);
1028   adjust_segment_size();
1029 }
1030 
1031 /** \brief Finds a suitable spot for an element and writes it to the file
1032 
1033     First, a suitable spot for the element is determined by looking at
1034     EbmlVoid elements. If none is found in the middle of the file then
1035     the element will be appended at the end.
1036 
1037     Second, the element is written at the location determined in the
1038     first step. If EbmlVoid elements are overwritten then a new,
1039     smaller one is created which covers the remainder of the
1040     overwritten one.
1041 
1042     Third, the internal records are updated to reflect the changes.
1043 
1044     \param e Pointer to the element to write.
1045     \param write_defaults Boolean that decides whether or not elements
1046       which contain their default value are written to the file.
1047  */
1048 void
write_element(EbmlElement * e,bool write_defaults,placement_strategy_e strategy)1049 kax_analyzer_c::write_element(EbmlElement *e,
1050                               bool write_defaults,
1051                               placement_strategy_e strategy) {
1052   e->UpdateSize(write_defaults, true);
1053   int64_t element_size = e->ElementSize(write_defaults);
1054 
1055   size_t data_idx;
1056   for (data_idx = (ps_anywhere == strategy ? 0 : m_data.size() - 1); m_data.size() > data_idx; ++data_idx) {
1057     // We're only interested in EbmlVoid elements. Skip the others.
1058     if (!Is<EbmlVoid>(m_data[data_idx]->m_id))
1059       continue;
1060 
1061     // Skip the element if it doesn't provide enough space.
1062     if (m_data[data_idx]->m_size < element_size)
1063       continue;
1064 
1065     // We've found our element. Overwrite it.
1066     m_file->setFilePointer(m_data[data_idx]->m_pos);
1067     e->Render(*m_file, write_defaults, false, true);
1068     if (m_doc_type_version_handler)
1069       m_doc_type_version_handler->account(*e, write_defaults);
1070 
1071     // Update the internal records.
1072     m_data[data_idx]->m_id   = EbmlId(*e);
1073     m_data[data_idx]->m_size = e->ElementSize(write_defaults);
1074 
1075     // Create a new void element after the element we've just written.
1076     handle_void_elements(data_idx);
1077 
1078     // We're done.
1079     return;
1080   }
1081 
1082   // We haven't found a suitable place. So store the element at the end of the file
1083   // and update the internal records.
1084   m_file->setFilePointer(0, seek_end);
1085   e->Render(*m_file, write_defaults, false, true);
1086   if (m_doc_type_version_handler)
1087     m_doc_type_version_handler->account(*e, write_defaults);
1088   m_data.push_back(kax_analyzer_data_c::create(EbmlId(*e), m_file->getFilePointer() - e->ElementSize(write_defaults), e->ElementSize(write_defaults)));
1089 
1090   // Adjust the segment's size.
1091   adjust_segment_size();
1092 }
1093 
1094 int
ensure_front_seek_head_links_to(unsigned int seek_head_idx)1095 kax_analyzer_c::ensure_front_seek_head_links_to(unsigned int seek_head_idx) {
1096   // It is possible that the seek head at the front has been removed
1097   // due to moving the seek head at the back one byte to the front
1098   // which calls remove_from_meta_seeks() with that second seek head.
1099 
1100   // If this seek head is located at the front then we've got nothing
1101   // to do. At the same time look for a seek head at the start.
1102 
1103   mxdebug_if(m_debug, fmt::format("ensure_front_seek_head_links_to start\n"));
1104 
1105   std::optional<unsigned int> first_seek_head_idx;
1106 
1107   for (int data_idx = 0, end = m_data.size(); end > data_idx; ++data_idx) {
1108     auto const &data = *m_data[data_idx];
1109 
1110     if (Is<KaxSeekHead>(data.m_id)) {
1111       if (static_cast<unsigned int>(data_idx) == seek_head_idx)
1112         return seek_head_idx;
1113 
1114       first_seek_head_idx = data_idx;
1115 
1116     } else if (Is<KaxCluster>(data.m_id))
1117       break;
1118   }
1119 
1120   // This seek head is not located at the start. If there is another
1121   // seek head present then the rest of the code has already taken
1122   // care of everything.
1123   if (first_seek_head_idx)
1124     return *first_seek_head_idx;
1125 
1126   // Our seek head is at the end and there's no seek head at the
1127   // start.
1128   mxdebug_if(m_debug, fmt::format("  no seek head at start but one at the end\n"));
1129 
1130   auto seek_head_position = m_segment->GetRelativePosition(m_data[seek_head_idx]->m_pos);
1131   auto seek_head_id       = memory_c::alloc(4);
1132 
1133   put_uint32_be(seek_head_id->get_buffer(), EBML_ID(KaxSeekHead).GetValue());
1134 
1135   auto new_seek_head = ebml_master_cptr{
1136     mtx::construct::cons<KaxSeekHead>(mtx::construct::cons<KaxSeek>(new KaxSeekID,       seek_head_id,
1137                                                                     new KaxSeekPosition, seek_head_position))
1138   };
1139 
1140   new_seek_head->UpdateSize();
1141   auto needed_size = static_cast<int64_t>(new_seek_head->ElementSize(true));
1142   auto first_time  = true;
1143 
1144   while (first_time) {
1145     mxdebug_if(m_debug, fmt::format("  looking for place for the new seek head at the start…\n"));
1146     // Find a place at the front with enough space.
1147     for (int data_idx = 0, end = m_data.size(); data_idx < end; ++data_idx) {
1148       auto &data = *m_data[data_idx];
1149 
1150       if (Is<KaxCluster>(data.m_id))
1151         break;
1152 
1153       if (!Is<EbmlVoid>(data.m_id))
1154         continue;
1155 
1156       // Avoid the case with the space being only one byte larger than
1157       // what we need. That would require moving the next element one
1158       // byte to the front triggering seek head adjustments.
1159       if ((data.m_size != needed_size) && (data.m_size < (needed_size + 2)))
1160         continue;
1161 
1162       mxdebug_if(m_debug, fmt::format("  got one! writing at file position {0}\n", data.m_pos));
1163       // Got a place. Write the seek head, update the internal record &
1164       // write a new void element.
1165       m_file->setFilePointer(data.m_pos);
1166       new_seek_head->Render(*m_file, true);
1167       if (m_doc_type_version_handler)
1168         m_doc_type_version_handler->account(*new_seek_head, true);
1169 
1170       data.m_size = needed_size;
1171       data.m_id   = EBML_ID(KaxSeekHead);
1172 
1173       handle_void_elements(data_idx);
1174 
1175       return data_idx;
1176     }
1177 
1178     mxdebug_if(m_debug, fmt::format("  no place, moving level 1 elements and trying again\n"));
1179     // We haven't found a spot. Move an existing level 1 element to the
1180     // end if we haven't done that yet and try again. Otherwise fail.
1181     if (first_time && !move_level1_element_before_cluster_to_end_of_file())
1182       break;
1183 
1184     first_time = false;
1185   }
1186 
1187   throw uer_error_unknown;
1188 
1189   return -1;
1190 }
1191 
1192 std::pair<bool, int>
try_adding_to_existing_meta_seek(EbmlElement * e)1193 kax_analyzer_c::try_adding_to_existing_meta_seek(EbmlElement *e) {
1194   auto first_seek_head_idx = -1;
1195 
1196   for (auto data_idx = 0u; m_data.size() > data_idx; ++data_idx) {
1197     // We only have to do work on SeekHead elements. Skip the others.
1198     if (!Is<KaxSeekHead>(m_data[data_idx]->m_id))
1199       continue;
1200 
1201     // Calculate how much free space there is behind the seek head.
1202     // merge_void_elemens() guarantees that there is no EbmlVoid element
1203     // at the end of the file and that all consecutive EbmlVoid elements
1204     // have been merged into a single element.
1205     size_t available_space = m_data[data_idx]->m_size;
1206     if (((data_idx + 1) < m_data.size()) && Is<EbmlVoid>(m_data[data_idx + 1]->m_id))
1207       available_space += m_data[data_idx + 1]->m_size;
1208 
1209     // Read the seek head, index the element and see how much space it needs.
1210     auto element   = read_element(data_idx);
1211     auto seek_head = dynamic_cast<KaxSeekHead *>(element.get());
1212     if (!seek_head)
1213       throw uer_error_unknown;
1214 
1215     if (-1 == first_seek_head_idx)
1216       first_seek_head_idx = data_idx;
1217 
1218     seek_head->IndexThis(*e, *m_segment.get());
1219     seek_head->UpdateSize(true);
1220 
1221     // We can use this seek head if it is at the end of the file, or if there
1222     // is enough space behind it in form of void elements.
1223     if ((m_data.size() != (data_idx + 1)) && (seek_head->ElementSize(true) > available_space))
1224       continue;
1225 
1226     // Write the seek head.
1227     m_file->setFilePointer(m_data[data_idx]->m_pos);
1228     seek_head->Render(*m_file, true);
1229     if (m_doc_type_version_handler)
1230       m_doc_type_version_handler->account(*seek_head, true);
1231 
1232     // Update the internal record.
1233     m_data[data_idx]->m_size = seek_head->ElementSize(true);
1234 
1235     // If this seek head is located at the end of the file then we have
1236     // to adjust the segment size.
1237     if (m_data.size() == (data_idx + 1))
1238       adjust_segment_size();
1239 
1240     else
1241       // Otherwise create an EbmlVoid to fill the gap (if any).
1242       handle_void_elements(data_idx);
1243 
1244     // Now ensure that the seek head is either at the front of the
1245     // file itself or that it is referenced from one at the front. It
1246     // is possible that the seek head at the front has been removed
1247     // due to moving the seek head at the back one byte to the front
1248     // which calls remove_from_meta_seeks() with that second seek head.
1249     first_seek_head_idx = ensure_front_seek_head_links_to(data_idx);
1250 
1251     // We're done.
1252     return { true, first_seek_head_idx };
1253   }
1254 
1255   return { false, first_seek_head_idx };
1256 }
1257 
1258 void
move_seek_head_to_end_and_create_new_one_at_start(EbmlElement * e,int first_seek_head_idx)1259 kax_analyzer_c::move_seek_head_to_end_and_create_new_one_at_start(EbmlElement *e,
1260                                                                   int first_seek_head_idx) {
1261   // Read the first seek head…
1262   auto element   = read_element(first_seek_head_idx);
1263   auto seek_head = dynamic_cast<KaxSeekHead *>(element.get());
1264   if (!seek_head)
1265     throw uer_error_unknown;
1266 
1267   // …index our element…
1268   seek_head->IndexThis(*e, *m_segment.get());
1269   seek_head->UpdateSize(true);
1270 
1271   // …write the seek head at the end of the file…
1272   m_file->setFilePointer(0, seek_end);
1273   seek_head->Render(*m_file, true);
1274   if (m_doc_type_version_handler)
1275     m_doc_type_version_handler->account(*seek_head, true);
1276 
1277   // …and update the internal records.
1278   m_data.push_back(kax_analyzer_data_c::create(EBML_ID(KaxSeekHead), seek_head->GetElementPosition(), seek_head->ElementSize(true)));
1279 
1280   // Update the segment size.
1281   adjust_segment_size();
1282 
1283   // Create a new seek head and write it to the file.
1284   std::shared_ptr<KaxSeekHead> forward_seek_head(new KaxSeekHead);
1285   forward_seek_head->IndexThis(*seek_head, *m_segment.get());
1286   forward_seek_head->UpdateSize(true);
1287 
1288   m_file->setFilePointer(m_data[first_seek_head_idx]->m_pos);
1289   forward_seek_head->Render(*m_file, true);
1290   if (m_doc_type_version_handler)
1291     m_doc_type_version_handler->account(*forward_seek_head, true);
1292 
1293   // Update the internal record to reflect that there's a new seek head.
1294   m_data[first_seek_head_idx]->m_size = forward_seek_head->ElementSize(true);
1295 
1296   // Create a void element behind the small new first seek head.
1297   handle_void_elements(first_seek_head_idx);
1298 
1299   // We're done.
1300 }
1301 
1302 bool
create_new_meta_seek_at_start(EbmlElement * e)1303 kax_analyzer_c::create_new_meta_seek_at_start(EbmlElement *e) {
1304   auto new_seek_head = std::make_shared<KaxSeekHead>();
1305   new_seek_head->IndexThis(*e, *m_segment.get());
1306   new_seek_head->UpdateSize(true);
1307 
1308   for (auto data_idx = 0u; m_data.size() > data_idx; ++data_idx) {
1309     // We can only overwrite void elements. Skip the others.
1310     if (!Is<EbmlVoid>(m_data[data_idx]->m_id))
1311       continue;
1312 
1313     // Skip the element if it doesn't offer enough space for the seek head.
1314     if (m_data[data_idx]->m_size < static_cast<int64_t>(new_seek_head->ElementSize(true)))
1315       continue;
1316 
1317     // We've found a suitable spot. Write the seek head.
1318     m_file->setFilePointer(m_data[data_idx]->m_pos);
1319     new_seek_head->Render(*m_file, true);
1320     if (m_doc_type_version_handler)
1321       m_doc_type_version_handler->account(*new_seek_head, true);
1322 
1323     // Adjust the internal records for the new seek head.
1324     m_data[data_idx]->m_size = new_seek_head->ElementSize(true);
1325     m_data[data_idx]->m_id   = EBML_ID(KaxSeekHead);
1326 
1327     // Write a void element after the newly written seek head in order to
1328     // cover the space previously occupied by the old void element.
1329     handle_void_elements(data_idx);
1330 
1331     // We're done.
1332     return true;
1333   }
1334 
1335   return false;
1336 }
1337 
1338 bool
move_level1_element_before_cluster_to_end_of_file()1339 kax_analyzer_c::move_level1_element_before_cluster_to_end_of_file() {
1340   auto candidates_for_moving = std::vector<std::pair<int, unsigned int> >{};
1341 
1342   for (auto data_idx = 0u; m_data.size() > data_idx; ++data_idx) {
1343     auto const &id = m_data[data_idx]->m_id;
1344 
1345     if (Is<KaxCluster>(id))
1346       break;
1347 
1348     if (mtx::included_in(id, EBML_ID(KaxAttachments)))
1349       candidates_for_moving.emplace_back(10, data_idx);
1350 
1351     else if (id == EBML_ID(KaxTracks))
1352       candidates_for_moving.emplace_back(20, data_idx);
1353 
1354     else if (id == EBML_ID(KaxInfo))
1355       candidates_for_moving.emplace_back(30, data_idx);
1356   }
1357 
1358   // Have we found at least one suitable element before the first
1359   // cluster? If not bail out.
1360   if (candidates_for_moving.empty())
1361     return false;
1362 
1363   // Sort by their importance first and position second. Lower
1364   // importance means the element is more likely to be moved.
1365   std::sort(candidates_for_moving.begin(), candidates_for_moving.end());
1366 
1367   auto const to_move_idx = candidates_for_moving.front().second;
1368   auto const &to_move    = *m_data[to_move_idx];
1369 
1370   mxdebug_if(m_debug, fmt::format("Moving level 1 at index {0} to the end ({1})\n", to_move_idx, to_move.to_string()));
1371 
1372   // We read the element and write it again at the end of the file.
1373   m_file->setFilePointer(to_move.m_pos);
1374   auto buf = m_file->read(to_move.m_size);
1375 
1376   m_file->setFilePointer(0, seek_end);
1377   auto position = m_file->getFilePointer();
1378 
1379   m_file->write(buf);
1380 
1381   // Update the internal records.
1382   m_data.push_back(kax_analyzer_data_c::create(to_move.m_id, position, to_move.m_size));
1383 
1384   // Overwrite with a void element.
1385   m_data[to_move_idx]->m_size = 0;
1386   handle_void_elements(to_move_idx);
1387 
1388   debug_dump_elements_maybe("move_level1_element_before_cluster_to_end_of_file");
1389 
1390   verify_data_structures_against_file("move_level1_element_before_cluster_to_end_of_file");
1391 
1392   // And add it to a meta seek element.
1393   auto e = read_element(m_data.size() - 1);
1394   if (!e)
1395     throw uer_error_unknown;
1396 
1397   add_to_meta_seek(e.get());
1398 
1399   return true;
1400 }
1401 
1402 /** \brief Adds an element to one of the meta seek entries
1403 
1404     This function iterates over all meta seek elements and looks
1405     for one that has enough space (via following EbmlVoid elements or
1406     because it is located at the end of the file) for indexing
1407     the element \c e.
1408 
1409     If no such element is found then a new meta seek element is
1410     created at an appropriate place, and that element is indexed.
1411 
1412     \param e Pointer to the element to index.
1413  */
1414 void
add_to_meta_seek(EbmlElement * e)1415 kax_analyzer_c::add_to_meta_seek(EbmlElement *e) {
1416   auto result = try_adding_to_existing_meta_seek(e);
1417 
1418   if (result.first)
1419     return;
1420 
1421   // No suitable meta seek head found -- we have to write a new one.
1422 
1423   // If we have found a prior seek head then we copy that one to the end
1424   // of the file including the newly indexed element and write a one-element
1425   // seek head at the first meta seek's position pointing to the one at the
1426   // end.
1427 
1428   if (-1 != result.second) {
1429     move_seek_head_to_end_and_create_new_one_at_start(e, result.second);
1430     return;
1431   }
1432 
1433   // We don't have a seek head to copy. Create one before the first chapter if possible.
1434   if (create_new_meta_seek_at_start(e))
1435     return;
1436 
1437   // We haven't found a place for the new seek head before the first
1438   // cluster. Therefore we must try to move an existing level 1
1439   // element to the end of the file first.
1440   if (move_level1_element_before_cluster_to_end_of_file()) {
1441     add_to_meta_seek(e);
1442     return;
1443   }
1444 
1445   // This is not supported at the moment.
1446   throw uer_error_not_indexable;
1447 }
1448 
1449 ebml_master_cptr
read_all(const EbmlCallbacks & callbacks)1450 kax_analyzer_c::read_all(const EbmlCallbacks &callbacks) {
1451   reopen_file();
1452 
1453   ebml_master_cptr master;
1454   EbmlStream es(*m_file);
1455   size_t i;
1456 
1457   for (i = 0; m_data.size() > i; ++i) {
1458     kax_analyzer_data_c &data = *m_data[i].get();
1459     if (EBML_INFO_ID(callbacks) != data.m_id)
1460       continue;
1461 
1462     m_file->setFilePointer(data.m_pos);
1463     int upper_lvl_el     = 0;
1464     EbmlElement *element = es.FindNextElement(EBML_CLASS_CONTEXT(KaxSegment), upper_lvl_el, 0xFFFFFFFFL, true);
1465     if (!element)
1466       continue;
1467 
1468     if (EbmlId(*element) != EBML_INFO_ID(callbacks)) {
1469       delete element;
1470       continue;
1471     }
1472 
1473     EbmlElement *l2 = nullptr;
1474     element->Read(*m_stream, EBML_INFO_CONTEXT(callbacks), upper_lvl_el, l2, true);
1475 
1476     if (!master)
1477       master = ebml_master_cptr(static_cast<EbmlMaster *>(element));
1478     else {
1479       auto src = static_cast<EbmlMaster *>(element);
1480       while (src->ListSize() > 0) {
1481         master->PushElement(*(*src)[0]);
1482         src->Remove(0);
1483       }
1484       delete element;
1485     }
1486   }
1487 
1488   if (master && (master->ListSize() == 0))
1489     master.reset();
1490 
1491   return master;
1492 }
1493 
1494 void
read_all_meta_seeks()1495 kax_analyzer_c::read_all_meta_seeks() {
1496   m_meta_seeks_by_position.clear();
1497 
1498   unsigned int i, num_entries = m_data.size();
1499   std::map<int64_t, bool> positions_found;
1500 
1501   for (i = 0; i < num_entries; i++)
1502     positions_found[m_data[i]->m_pos] = true;
1503 
1504   for (i = 0; i < num_entries; i++)
1505     if (Is<KaxSeekHead>(m_data[i]->m_id))
1506       read_meta_seek(m_data[i]->m_pos, positions_found);
1507 
1508   std::sort(m_data.begin(), m_data.end());
1509 }
1510 
1511 void
read_meta_seek(uint64_t pos,std::map<int64_t,bool> & positions_found)1512 kax_analyzer_c::read_meta_seek(uint64_t pos,
1513                                std::map<int64_t, bool> &positions_found) {
1514   if (m_meta_seeks_by_position[pos])
1515     return;
1516 
1517   m_meta_seeks_by_position[pos] = true;
1518 
1519   m_file->setFilePointer(pos);
1520 
1521   int upper_lvl_el = 0;
1522   EbmlElement *l1  = m_stream->FindNextElement(EBML_CONTEXT(m_segment), upper_lvl_el, 0xFFFFFFFFL, true, 1);
1523 
1524   if (!l1)
1525     return;
1526 
1527   if (!Is<KaxSeekHead>(l1)) {
1528     delete l1;
1529     return;
1530   }
1531 
1532   EbmlElement *l2 = nullptr;
1533   auto master     = static_cast<EbmlMaster *>(l1);
1534   master->Read(*m_stream, EBML_CONTEXT(l1), upper_lvl_el, l2, true);
1535 
1536   unsigned int i;
1537   for (i = 0; master->ListSize() > i; i++) {
1538     if (!Is<KaxSeek>((*master)[i]))
1539       continue;
1540 
1541     auto seek        = static_cast<KaxSeek *>((*master)[i]);
1542     auto seek_id     = FindChild<KaxSeekID>(seek);
1543     int64_t seek_pos = seek->Location() + m_segment->GetElementPosition() + m_segment->HeadSize();
1544 
1545     if ((0 == pos) || !seek_id)
1546       continue;
1547 
1548     if (positions_found[seek_pos])
1549       continue;
1550 
1551     EbmlId the_id(seek_id->GetBuffer(), seek_id->GetSize());
1552     m_data.push_back(kax_analyzer_data_c::create(the_id, seek_pos, -1));
1553     positions_found[seek_pos] = true;
1554 
1555     if (Is<KaxSeekHead>(the_id))
1556       read_meta_seek(seek_pos, positions_found);
1557   }
1558 
1559   delete l1;
1560 }
1561 
1562 void
fix_element_sizes(uint64_t file_size)1563 kax_analyzer_c::fix_element_sizes(uint64_t file_size) {
1564   unsigned int i;
1565   for (i = 0; m_data.size() > i; ++i)
1566     if (-1 == m_data[i]->m_size)
1567       m_data[i]->m_size = ((i + 1) < m_data.size() ? m_data[i + 1]->m_pos : file_size) - m_data[i]->m_pos;
1568 }
1569 
1570 void
fix_unknown_size_for_last_level1_element()1571 kax_analyzer_c::fix_unknown_size_for_last_level1_element() {
1572   if (!m_data.size())
1573     return;
1574 
1575   auto &data = *m_data.back();
1576   if (data.m_size_known)
1577     return;
1578 
1579   mxinfo(fmt::format("chunky bacon! data {0} seg end {1}\n", data.to_string(), m_segment_end));
1580 
1581   auto elt = read_element(m_data.size() - 1);
1582   if (!elt)
1583     throw uer_error_fixing_last_element_unknown_size_failed;
1584 
1585   auto head_size       = static_cast<unsigned int>(elt->HeadSize());
1586   auto actual_size     = m_segment_end - (elt->GetElementPosition() + head_size);
1587   auto required_bytes  = CodedSizeLength(actual_size, 0);
1588   auto available_bytes = elt->GetSizeLength();
1589 
1590   if ((available_bytes < required_bytes) || !elt->ForceSize(actual_size))
1591     throw uer_error_fixing_last_element_unknown_size_failed;
1592 
1593   elt->OverwriteHead(*m_stream, true);
1594 
1595   data.m_size       = actual_size + head_size;
1596   data.m_size_known = true;
1597 
1598   log_debug_message(fmt::format("fix_unknown_size_for_last_level1_element: element fixed to new payload size {0} head size {1} segment end {2}\n", actual_size, head_size, m_segment_end));
1599 }
1600 
1601 kax_analyzer_c::placement_strategy_e
get_placement_strategy_for(EbmlElement * e)1602 kax_analyzer_c::get_placement_strategy_for(EbmlElement *e) {
1603   return Is<KaxTags>(e) ? ps_end : ps_anywhere;
1604 }
1605 
1606 EbmlHead &
get_ebml_head()1607 kax_analyzer_c::get_ebml_head() {
1608   return *m_ebml_head;
1609 }
1610 
1611 bool
is_webm() const1612 kax_analyzer_c::is_webm()
1613   const {
1614   return m_is_webm;
1615 }
1616 
1617 uint64_t
get_segment_pos() const1618 kax_analyzer_c::get_segment_pos()
1619   const {
1620   if (!m_segment)
1621     return 0;
1622 
1623   return m_segment->GetElementPosition();
1624 }
1625 
1626 uint64_t
get_segment_data_start_pos() const1627 kax_analyzer_c::get_segment_data_start_pos()
1628   const {
1629   if (!m_segment)
1630     return 0;
1631 
1632   return m_segment->GetElementPosition() + m_segment->HeadSize();
1633 }
1634 
1635 mtx::bits::value_cptr
read_segment_uid_from(std::string const & file_name)1636 kax_analyzer_c::read_segment_uid_from(std::string const &file_name) {
1637   try {
1638     auto analyzer = std::make_shared<kax_analyzer_c>(file_name);
1639     auto ok       = analyzer
1640       ->set_parse_mode(kax_analyzer_c::parse_mode_fast)
1641       .set_open_mode(MODE_READ)
1642       .process();
1643 
1644     if (ok) {
1645       auto element      = analyzer->read_all(EBML_INFO(KaxInfo));
1646       auto segment_info = dynamic_cast<KaxInfo *>(element.get());
1647       auto segment_uid  = segment_info ? FindChild<KaxSegmentUID>(segment_info) : nullptr;
1648 
1649       if (segment_uid)
1650         return std::make_shared<mtx::bits::value_c>(*segment_uid);
1651     }
1652 
1653   } catch (mtx::mm_io::exception &ex) {
1654     throw mtx::kax_analyzer_x{fmt::format(Y("The file '{0}' could not be opened for reading: {1}."), file_name, ex)};
1655 
1656   } catch (mtx::kax_analyzer_x &ex) {
1657     throw mtx::kax_analyzer_x{fmt::format(Y("The file '{0}' could not be opened for reading: {1}."), file_name, ex)};
1658 
1659   } catch (...) {
1660     throw mtx::kax_analyzer_x{fmt::format(Y("The file '{0}' could not be opened or parsed."), file_name)};
1661 
1662   }
1663 
1664   throw mtx::kax_analyzer_x{fmt::format(Y("No segment UID could be found in the file '{0}'."), file_name)};
1665 }
1666 
1667 int
find(EbmlId const & id)1668 kax_analyzer_c::find(EbmlId const &id) {
1669   for (int idx = 0, end = m_data.size(); idx < end; idx++)
1670     if (id == m_data[idx]->m_id)
1671       return idx;
1672 
1673   return -1;
1674 }
1675 
1676 void
with_elements(const EbmlId & id,std::function<void (kax_analyzer_data_c const &)> worker) const1677 kax_analyzer_c::with_elements(const EbmlId &id,
1678                               std::function<void(kax_analyzer_data_c const &)> worker)
1679   const {
1680   for (auto const &data : m_data)
1681     if (data->m_id == id)
1682       worker(*data);
1683 }
1684 
1685 void
determine_webm()1686 kax_analyzer_c::determine_webm() {
1687   auto doc_type = FindChild<EDocType>(*m_ebml_head);
1688   m_is_webm     = doc_type && (doc_type->GetValue() == "webm");
1689 }
1690 
1691 
1692 // ------------------------------------------------------------
1693 
1694 bool m_show_progress;
1695 int m_previous_percentage;
1696 
console_kax_analyzer_c(std::string file_name)1697 console_kax_analyzer_c::console_kax_analyzer_c(std::string file_name)
1698   : kax_analyzer_c(file_name)
1699   , m_show_progress(false)
1700   , m_previous_percentage(-1)
1701 {
1702 }
1703 
1704 void
set_show_progress(bool show_progress)1705 console_kax_analyzer_c::set_show_progress(bool show_progress) {
1706   if (-1 == m_previous_percentage)
1707     m_show_progress = show_progress;
1708 }
1709 
1710 void
show_progress_start(int64_t)1711 console_kax_analyzer_c::show_progress_start(int64_t) {
1712   if (!m_show_progress)
1713     return;
1714 
1715   m_previous_percentage = -1;
1716   show_progress_running(0);
1717 }
1718 
1719 bool
show_progress_running(int percentage)1720 console_kax_analyzer_c::show_progress_running(int percentage) {
1721   if (!m_show_progress || (percentage == m_previous_percentage))
1722     return true;
1723 
1724   std::string full_bar(        percentage  * CONSOLE_PERCENTAGE_WIDTH / 100, '=');
1725   std::string empty_bar((100 - percentage) * CONSOLE_PERCENTAGE_WIDTH / 100, ' ');
1726 
1727   mxinfo(fmt::format(Y("Progress: [{0}{1}] {2}%"), full_bar, empty_bar, percentage));
1728   mxinfo("\r");
1729 
1730   m_previous_percentage = percentage;
1731 
1732   return true;
1733 }
1734 
1735 void
show_progress_done()1736 console_kax_analyzer_c::show_progress_done() {
1737   if (!m_show_progress)
1738     return;
1739 
1740   show_progress_running(100);
1741   mxinfo("\n");
1742 }
1743 
1744 void
debug_abort_process()1745 console_kax_analyzer_c::debug_abort_process() {
1746 }
1747