1 /*
2 mkvmerge -- utility for splicing together matroska files
3 from component media subtypes
4
5 Distributed under the GPL v2
6 see the file COPYING for details
7 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8
9 Matroska file analyzer and updater
10
11 Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13
14 #include "common/common_pch.h"
15
16 #include <algorithm>
17
18 #include <ebml/EbmlStream.h>
19 #include <ebml/EbmlSubHead.h>
20 #include <ebml/EbmlVoid.h>
21 #include <matroska/KaxCluster.h>
22 #include <matroska/KaxCues.h>
23 #include <matroska/KaxCuesData.h>
24 #include <matroska/KaxSeekHead.h>
25 #include <matroska/KaxSegment.h>
26 #include <matroska/KaxTags.h>
27
28 #include "common/bitvalue.h"
29 #include "common/construct.h"
30 #include "common/doc_type_version_handler.h"
31 #include "common/ebml.h"
32 #include "common/endian.h"
33 #include "common/error.h"
34 #include "common/list_utils.h"
35 #include "common/kax_analyzer.h"
36 #include "common/mm_io_x.h"
37 #include "common/mm_file_io.h"
38 #include "common/mm_proxy_io.h"
39 #include "common/mm_read_buffer_io.h"
40 #include "common/strings/editing.h"
41 #include "common/strings/formatting.h"
42
43 using namespace libebml;
44 using namespace libmatroska;
45
46 namespace {
47
48 constexpr auto CONSOLE_PERCENTAGE_WIDTH = 25;
49
50 template<typename Tmaster,
51 typename Telement>
52 kax_analyzer_c::update_element_result_e
update_uid_referrals(kax_analyzer_c & analyzer,std::unordered_map<uint64_t,uint64_t> const & changes)53 update_uid_referrals(kax_analyzer_c &analyzer,
54 std::unordered_map<uint64_t, uint64_t> const &changes) {
55 auto master = analyzer.read_all(Tmaster::ClassInfos);
56 if (!master)
57 return kax_analyzer_c::uer_success;
58
59 auto modified = change_values<Telement>(static_cast<EbmlMaster &>(*master), changes);
60
61 return modified ? analyzer.update_element(master) : kax_analyzer_c::uer_success;
62 }
63
64 }
65
66 bool
operator <(const kax_analyzer_data_cptr & d1,const kax_analyzer_data_cptr & d2)67 operator <(const kax_analyzer_data_cptr &d1,
68 const kax_analyzer_data_cptr &d2) {
69 return d1->m_pos < d2->m_pos;
70 }
71
72 std::string
to_string() const73 kax_analyzer_data_c::to_string() const {
74 const EbmlCallbacks *callbacks = find_ebml_callbacks(EBML_INFO(KaxSegment), m_id);
75
76 if (!callbacks && Is<EbmlVoid>(m_id))
77 callbacks = &EBML_CLASS_CALLBACK(EbmlVoid);
78
79 std::string name;
80 if (callbacks)
81 name = EBML_INFO_NAME(*callbacks);
82
83 else
84 name = fmt::format("0x{0:0{1}x}", EBML_ID_VALUE(m_id), EBML_ID_LENGTH(m_id) * 2);
85
86 return fmt::format("{0} size {1}{3} at {2}", name, m_size, m_pos, m_size_known ? "" : " (unknown)");
87 }
88
kax_analyzer_c(std::string file_name)89 kax_analyzer_c::kax_analyzer_c(std::string file_name)
90 {
91 m_file_name = file_name;
92 m_close_file = true;
93 }
94
kax_analyzer_c(mm_io_cptr const & file)95 kax_analyzer_c::kax_analyzer_c(mm_io_cptr const &file)
96 {
97 m_file_name = file->get_file_name();
98 m_file = file;
99 m_close_file = false;
100 }
101
~kax_analyzer_c()102 kax_analyzer_c::~kax_analyzer_c() {
103 close_file();
104 }
105
106 void
close_file()107 kax_analyzer_c::close_file() {
108 if (m_close_file) {
109 m_file.reset();
110 m_stream.reset();
111 }
112 }
113
114 void
reopen_file()115 kax_analyzer_c::reopen_file() {
116 if (m_file)
117 return;
118
119 try {
120 m_file = std::make_shared<mm_file_io_c>(m_file_name, m_open_mode);
121 if (MODE_READ == m_open_mode)
122 m_file = std::make_shared<mm_read_buffer_io_c>(m_file);
123
124 } catch (mtx::mm_io::exception &) {
125 m_file.reset();
126
127 throw MODE_READ == m_open_mode ? uer_error_opening_for_reading : uer_error_opening_for_writing;
128 }
129
130 m_stream = std::make_shared<EbmlStream>(*m_file);
131 }
132
133 void
reopen_file_for_writing()134 kax_analyzer_c::reopen_file_for_writing() {
135 if (m_file && (MODE_WRITE == m_open_mode))
136 return;
137
138 m_file.reset();
139 m_open_mode = MODE_WRITE;
140
141 reopen_file();
142 }
143
144 void
_log_debug_message(const std::string & message)145 kax_analyzer_c::_log_debug_message(const std::string &message) {
146 mxinfo(message);
147 }
148
149 bool
analyzer_debugging_requested(const std::string & section)150 kax_analyzer_c::analyzer_debugging_requested(const std::string §ion) {
151 return m_debug || debugging_c::requested("kax_analyzer_"s + section);
152 }
153
154 void
debug_dump_elements()155 kax_analyzer_c::debug_dump_elements() {
156 size_t i;
157 for (i = 0; i < m_data.size(); i++)
158 log_debug_message(fmt::format("{0}: {1}\n", i, m_data[i]->to_string()));
159 }
160
161 void
debug_dump_elements_maybe(const std::string & hook_name)162 kax_analyzer_c::debug_dump_elements_maybe(const std::string &hook_name) {
163 if (!analyzer_debugging_requested(hook_name))
164 return;
165
166 log_debug_message(fmt::format("kax_analyzer_{0} dumping elements:\n", hook_name));
167 debug_dump_elements();
168 }
169
170 void
validate_data_structures(const std::string & hook_name)171 kax_analyzer_c::validate_data_structures(const std::string &hook_name) {
172 if (m_data.empty())
173 return;
174
175 bool gap_debugging = analyzer_debugging_requested("gaps");
176 bool ok = true;
177 size_t i;
178
179 for (i = 0; m_data.size() -1 > i; i++) {
180 if ((m_data[i]->m_pos + m_data[i]->m_size) > m_data[i + 1]->m_pos) {
181 log_debug_message(fmt::format("kax_analyzer_{0}: Interal data structure corruption at pos {1} (size + position > next position); dumping elements\n", hook_name, i));
182 ok = false;
183 } else if (gap_debugging && ((m_data[i]->m_pos + m_data[i]->m_size) < m_data[i + 1]->m_pos)) {
184 log_debug_message(fmt::format("kax_analyzer_{0}: Gap found at pos {1} (size + position < next position); dumping elements\n", hook_name, i));
185 ok = false;
186 }
187 }
188
189 if (!ok) {
190 debug_dump_elements();
191
192 if (m_throw_on_error)
193 throw mtx::kax_analyzer_x{Y("The data in the file is corrupted and cannot be modified safely")};
194
195 debug_abort_process();
196 }
197 }
198
199 void
verify_data_structures_against_file(const std::string & hook_name)200 kax_analyzer_c::verify_data_structures_against_file(const std::string &hook_name) {
201 kax_analyzer_c actual_content(m_file);
202 actual_content.process();
203
204 unsigned int num_items = std::max(m_data.size(), actual_content.m_data.size());
205 bool ok = m_data.size() == actual_content.m_data.size();
206 size_t max_info_len = 0;
207 std::vector<std::string> info_this, info_actual, info_markings;
208 size_t i;
209
210 for (i = 0; num_items > i; ++i) {
211 info_this.push_back( m_data.size() > i ? m_data[i]->to_string() : std::string{});
212 info_actual.push_back(actual_content.m_data.size() > i ? actual_content.m_data[i]->to_string() : std::string{});
213
214 max_info_len = std::max(max_info_len, info_this.back().length());
215
216 bool row_is_identical = info_this.back() == info_actual.back();
217 ok &= row_is_identical;
218
219 info_markings.emplace_back(row_is_identical ? " " : "*");
220 }
221
222 if (ok)
223 return;
224
225 log_debug_message(fmt::format("verify_data_structures_against_file({0}) failed. Dumping this on the left, actual on the right.\n", hook_name));
226
227 for (i = 0; num_items > i; ++i)
228 log_debug_message(fmt::format("{0} {1:<{2}s} {3}\n", info_markings[i], info_this[i], max_info_len, info_actual[i]));
229
230 debug_abort_process();
231 }
232
233 bool
probe(std::string file_name)234 kax_analyzer_c::probe(std::string file_name) {
235 try {
236 unsigned char data[4];
237 mm_file_io_c in(file_name.c_str());
238
239 if (in.read(data, 4) != 4)
240 return false;
241
242 return ((0x1a == data[0]) && (0x45 == data[1]) && (0xdf == data[2]) && (0xa3 == data[3]));
243 } catch (...) {
244 return false;
245 }
246 }
247
248 kax_analyzer_c &
set_parse_mode(parse_mode_e parse_mode)249 kax_analyzer_c::set_parse_mode(parse_mode_e parse_mode) {
250 m_parse_mode = parse_mode;
251 return *this;
252 }
253
254 kax_analyzer_c &
set_open_mode(open_mode mode)255 kax_analyzer_c::set_open_mode(open_mode mode) {
256 m_open_mode = mode;
257 return *this;
258 }
259
260 kax_analyzer_c &
set_throw_on_error(bool throw_on_error)261 kax_analyzer_c::set_throw_on_error(bool throw_on_error) {
262 m_throw_on_error = throw_on_error;
263 return *this;
264 }
265
266 kax_analyzer_c &
set_parser_start_position(uint64_t position)267 kax_analyzer_c::set_parser_start_position(uint64_t position) {
268 m_parser_start_position = position;
269 return *this;
270 }
271
272 kax_analyzer_c &
set_doc_type_version_handler(mtx::doc_type_version_handler_c * handler)273 kax_analyzer_c::set_doc_type_version_handler(mtx::doc_type_version_handler_c *handler) {
274 m_doc_type_version_handler = handler;
275 return *this;
276 }
277
278 bool
process()279 kax_analyzer_c::process() {
280 try {
281 auto result = process_internal();
282 mxdebug_if(m_debug, fmt::format("kax_analyzer: parsing file '{0}' result {1}\n", m_file->get_file_name(), result));
283
284 return result;
285
286 } catch (...) {
287 mxdebug_if(m_debug, fmt::format("kax_analyzer: parsing file '{0}' failed with an exception\n", m_file->get_file_name()));
288
289 show_progress_done();
290
291 if (m_throw_on_error)
292 throw;
293 return false;
294 }
295 }
296
297 bool
process_internal()298 kax_analyzer_c::process_internal() {
299 bool parse_fully = parse_mode_full == m_parse_mode;
300
301 reopen_file();
302
303 int64_t file_size = m_file->get_size();
304 show_progress_start(file_size);
305
306 m_segment.reset();
307 m_data.clear();
308
309 m_file->setFilePointer(0);
310 m_stream = std::make_shared<EbmlStream>(*m_file);
311
312 // Find the EbmlHead element. Must be the first one.
313 m_ebml_head.reset(static_cast<EbmlHead *>(m_stream->FindNextID(EBML_INFO(EbmlHead), 0xFFFFFFFFL)));
314 if (!m_ebml_head || !Is<EbmlHead>(*m_ebml_head))
315 throw mtx::kax_analyzer_x(Y("Not a valid Matroska file (no EBML head found)"));
316
317 EbmlElement *l0{};
318 int upper_lvl_el{};
319
320 m_ebml_head->Read(*m_stream, EBML_CONTEXT(m_ebml_head.get()), upper_lvl_el, l0, true, SCOPE_ALL_DATA);
321 m_ebml_head->SkipData(*m_stream, EBML_CONTEXT(m_ebml_head.get()));
322
323 determine_webm();
324
325 if (l0) {
326 delete l0;
327 l0 = nullptr;
328 }
329
330 while (true) {
331 // Next element must be a segment
332 l0 = m_stream->FindNextID(EBML_INFO(KaxSegment), 0xFFFFFFFFFFFFFFFFLL);
333 if (!l0)
334 throw mtx::kax_analyzer_x(Y("Not a valid Matroska file (no segment/level 0 element found)"));
335
336 if (Is<KaxSegment>(l0))
337 break;
338
339 l0->SkipData(*m_stream, EBML_CONTEXT(l0));
340 delete l0;
341 }
342
343 m_segment = std::shared_ptr<KaxSegment>(static_cast<KaxSegment *>(l0));
344 bool aborted = false;
345 bool cluster_found = false;
346 bool meta_seek_found = false;
347 m_segment_end = m_segment->IsFiniteSize() ? m_segment->GetElementPosition() + m_segment->HeadSize() + m_segment->GetSize() : m_file->get_size();
348 EbmlElement *l1 = nullptr;
349 upper_lvl_el = 0;
350
351 // In certain situations the caller doesn't way to have to pay the
352 // price for full analysis. Then it can configure the parser to
353 // start parsing at a certain offset. EbmlStream::FindNextElement()
354 // should take care of re-syncing to a known level 1 element. But
355 // take care not to start before the segment's data start position.
356 if (m_parser_start_position)
357 m_file->setFilePointer(std::max<uint64_t>(*m_parser_start_position, m_segment->GetElementPosition() + m_segment->HeadSize()));
358
359 // We've got our segment, so let's find all level 1 elements.
360 while (m_file->getFilePointer() < m_segment_end) {
361 if (!l1)
362 l1 = m_stream->FindNextElement(EBML_CONTEXT(l0), upper_lvl_el, 0xFFFFFFFFL, true, 1);
363
364 if (!l1 || (0 < upper_lvl_el))
365 break;
366
367 m_data.push_back(kax_analyzer_data_c::create(EbmlId(*l1), l1->GetElementPosition(), l1->ElementSize(true), l1->IsFiniteSize()));
368
369 cluster_found |= Is<KaxCluster>(l1);
370 meta_seek_found |= Is<KaxSeekHead>(l1);
371
372 l1->SkipData(*m_stream, EBML_CONTEXT(l1));
373 delete l1;
374 l1 = nullptr;
375
376 aborted = !show_progress_running((int)(m_file->getFilePointer() * 100 / file_size));
377
378 auto in_parent = !m_segment->IsFiniteSize()
379 || (m_file->getFilePointer() < (m_segment->GetElementPosition() + m_segment->HeadSize() + m_segment->GetSize()));
380
381 if (!in_parent || aborted || (cluster_found && meta_seek_found && !parse_fully))
382 break;
383
384 } // while (l1)
385
386 if (l1)
387 delete l1;
388
389 if (!aborted && !parse_fully)
390 read_all_meta_seeks();
391
392 show_progress_done();
393
394 validate_data_structures("process_internal_end");
395
396 if (!aborted) {
397 if (parse_mode_full != m_parse_mode)
398 fix_element_sizes(file_size);
399
400 return true;
401 }
402
403 m_segment.reset();
404 m_data.clear();
405
406 return false;
407 }
408
409 ebml_element_cptr
read_element(kax_analyzer_data_cptr const & element_data)410 kax_analyzer_c::read_element(kax_analyzer_data_cptr const &element_data) {
411 return read_element(*element_data);
412 }
413
414 ebml_element_cptr
read_element(unsigned int pos)415 kax_analyzer_c::read_element(unsigned int pos) {
416 return read_element(*m_data[pos]);
417 }
418
419 ebml_element_cptr
read_element(kax_analyzer_data_c const & element_data)420 kax_analyzer_c::read_element(kax_analyzer_data_c const &element_data) {
421 reopen_file();
422
423 EbmlStream es(*m_file);
424 m_file->setFilePointer(element_data.m_pos);
425
426 int upper_lvl_el_found = 0;
427 ebml_element_cptr e = ebml_element_cptr(es.FindNextElement(EBML_CONTEXT(m_segment), upper_lvl_el_found, 0xFFFFFFFFL, true, 1));
428 const EbmlCallbacks *callbacks = find_ebml_callbacks(EBML_INFO(KaxSegment), element_data.m_id);
429
430 if (!e || !callbacks || (EbmlId(*e) != EBML_INFO_ID(*callbacks))) {
431 e.reset();
432 return e;
433 }
434
435 upper_lvl_el_found = 0;
436 EbmlElement *upper_lvl_el = nullptr;
437 e->Read(*m_stream, EBML_INFO_CONTEXT(*callbacks), upper_lvl_el_found, upper_lvl_el, true);
438
439 return e;
440 }
441
442 bool
validate_and_break(std::string const & hook_name)443 kax_analyzer_c::validate_and_break(std::string const &hook_name) {
444 debug_dump_elements_maybe(hook_name);
445 validate_data_structures(hook_name);
446
447 if (analyzer_debugging_requested("verify"))
448 verify_data_structures_against_file(hook_name);
449
450 return debugging_c::requested(fmt::format("kax_analyzer_{0}_break", hook_name));
451 }
452
453 kax_analyzer_c::update_element_result_e
update_element(ebml_element_cptr const & e,bool write_defaults,bool add_mandatory_elements_if_missing)454 kax_analyzer_c::update_element(ebml_element_cptr const &e,
455 bool write_defaults,
456 bool add_mandatory_elements_if_missing) {
457 return update_element(e.get(), write_defaults, add_mandatory_elements_if_missing);
458 }
459
460 kax_analyzer_c::update_element_result_e
update_element(EbmlElement * e,bool write_defaults,bool add_mandatory_elements_if_missing)461 kax_analyzer_c::update_element(EbmlElement *e,
462 bool write_defaults,
463 bool add_mandatory_elements_if_missing) {
464 try {
465 reopen_file_for_writing();
466
467 if (add_mandatory_elements_if_missing)
468 fix_mandatory_elements(e);
469 remove_voids_from_master(e);
470
471 placement_strategy_e strategy = get_placement_strategy_for(e);
472
473 if (validate_and_break("update_element_0"))
474 return uer_success;
475
476 fix_unknown_size_for_last_level1_element();
477 if (validate_and_break("update_element_1"))
478 return uer_success;
479
480 overwrite_all_instances(EbmlId(*e));
481 if (validate_and_break("update_element_2"))
482 return uer_success;
483
484 merge_void_elements();
485 if (validate_and_break("update_element_3"))
486 return uer_success;
487
488 write_element(e, write_defaults, strategy);
489 if (validate_and_break("update_element_4"))
490 return uer_success;
491
492 remove_from_meta_seeks(EbmlId(*e));
493 if (validate_and_break("update_element_5"))
494 return uer_success;
495
496 merge_void_elements();
497 if (validate_and_break("update_element_6"))
498 return uer_success;
499
500 add_to_meta_seek(e);
501 if (validate_and_break("update_element_7"))
502 return uer_success;
503
504 merge_void_elements();
505 if (validate_and_break("update_element_8"))
506 return uer_success;
507
508 } catch (kax_analyzer_c::update_element_result_e result) {
509 debug_dump_elements_maybe("update_element_exception");
510 return result;
511
512 } catch (mtx::mm_io::exception &ex) {
513 mxdebug_if(m_debug, fmt::format("I/O exception: {0}\n", ex.what()));
514 return uer_error_unknown;
515 }
516
517 return uer_success;
518 }
519
520 kax_analyzer_c::update_element_result_e
remove_elements(EbmlId const & id)521 kax_analyzer_c::remove_elements(EbmlId const &id) {
522 try {
523 reopen_file_for_writing();
524
525 if (validate_and_break("remove_elements_0"))
526 return uer_success;
527
528 fix_unknown_size_for_last_level1_element();
529 if (validate_and_break("remove_elements_1"))
530 return uer_success;
531
532 overwrite_all_instances(id);
533 if (validate_and_break("remove_elements_2"))
534 return uer_success;
535
536 merge_void_elements();
537 if (validate_and_break("remove_elements_3"))
538 return uer_success;
539
540 remove_from_meta_seeks(id);
541 if (validate_and_break("remove_elements_4"))
542 return uer_success;
543
544 merge_void_elements();
545 if (validate_and_break("remove_elements_5"))
546 return uer_success;
547
548 } catch (kax_analyzer_c::update_element_result_e result) {
549 debug_dump_elements_maybe("update_element_exception");
550 return result;
551 }
552
553 return uer_success;
554 }
555
556 kax_analyzer_c::update_element_result_e
update_uid_referrals(std::unordered_map<uint64_t,uint64_t> const & track_uid_changes)557 kax_analyzer_c::update_uid_referrals(std::unordered_map<uint64_t, uint64_t> const &track_uid_changes) {
558 mxdebug_if(m_debug, fmt::format("kax_analyzer: update_track_uid_referrals: number of changes: {0}\n", track_uid_changes.size()));
559
560 if (track_uid_changes.empty())
561 return uer_success;
562
563 mxdebug_if(m_debug, fmt::format("kax_analyzer: update_track_uid_referrals: number of changes: {0}\n", track_uid_changes.size()));
564
565 auto result = ::update_uid_referrals<KaxChapters, KaxChapterTrackNumber>(*this, track_uid_changes);
566 if (result != uer_success)
567 return result;
568
569 result = ::update_uid_referrals<KaxTags, KaxTagTrackUID>(*this, track_uid_changes);
570 if (result != uer_success)
571 return result;
572
573 return uer_success;
574 }
575
576 /** \brief Sets the m_segment size to the length of the file
577 */
578 void
adjust_segment_size()579 kax_analyzer_c::adjust_segment_size() {
580 // If the old segment's size is unknown then don't try to force a
581 // finite size as this will fail most of the time: an
582 // infinite/unknown size is coded by the value 0 which is often
583 // stored as a single byte (e.g. Haali's muxer does this).
584 if (!m_segment->IsFiniteSize())
585 return;
586
587 auto new_segment = std::make_shared<KaxSegment>();
588 m_file->setFilePointer(m_segment->GetElementPosition());
589 new_segment->WriteHead(*m_file, m_segment->HeadSize() - 4);
590
591 m_file->setFilePointer(0, seek_end);
592 if (!new_segment->ForceSize(m_file->getFilePointer() - m_segment->HeadSize() - m_segment->GetElementPosition())) {
593 m_segment->OverwriteHead(*m_file);
594 throw uer_error_segment_size_for_element;
595 }
596
597 new_segment->OverwriteHead(*m_file);
598 m_segment = new_segment;
599 }
600
601 /** \brief Create an EbmlVoid element at a specific location
602
603 This function fills a gap in the file with an EbmlVoid. If an
604 EbmlVoid element is located directly behind the gap then this
605 element is overwritten as well.
606
607 The function calculates the size of the new void element by taking
608 the next non-EbmlVoid's position and subtracting from it the end
609 position of the current element indicated by the \c data_idx
610 parameter.
611
612 If the space is not big enough to contain an EbmlVoid element then
613 the EBML head of the following element is moved one byte to the
614 front and its size field is extended by one byte. That way the
615 file stays compatible with all parsers, and only a small number of
616 bytes have to be moved around.
617
618 The \c m_data member structure is also updated to reflect the
619 changes made to the file.
620
621 The function relies on \c m_data[data_idx] to be up to date
622 regarding its size. If the size of \c m_data[data_idx] is zero
623 then it is assumed that the element shall be overwritten with an
624 EbmlVoid element, and \c m_data[data_idx] will be removed from the
625 \c m_data structure.
626
627 \param data_idx Index into the \c m_data structure pointing to the
628 current element after which the gap is located.
629
630 \return \c true if a new void element was created and \c false if
631 there was no need to create one or if there was not enough
632 space.
633 */
634 bool
handle_void_elements(size_t data_idx)635 kax_analyzer_c::handle_void_elements(size_t data_idx) {
636 static debugging_option_c s_debug_void{"kax_analyzer_handle_void_elements"};
637
638 // Is the element at the end of the file? If so truncate the file
639 // and remove the element from the data structure if that was
640 // requested. Then we're done.
641 if (m_data.size() == (data_idx + 1)) {
642 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): element is at end; truncating file\n", data_idx));
643 m_file->truncate(m_data[data_idx]->m_pos + m_data[data_idx]->m_size);
644 adjust_segment_size();
645 if (0 == m_data[data_idx]->m_size)
646 m_data.erase(m_data.begin() + data_idx);
647 return false;
648 }
649
650 // Are the following elements EbmlVoid elements?
651 size_t end_idx = data_idx + 1;
652 while ((m_data.size() > end_idx) && Is<EbmlVoid>(m_data[end_idx]->m_id))
653 ++end_idx;
654
655 if (end_idx > data_idx + 1) {
656 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): {1} void element(s) following; merging\n", data_idx, end_idx - data_idx - 1));
657
658 // Yes, there is at least one. Remove these elements from the list
659 // in order to create a new EbmlVoid element covering their space
660 // as well.
661 m_data.erase(m_data.begin() + data_idx + 1, m_data.begin() + end_idx);
662 }
663
664 // Calculate how much space we have to cover with a void
665 // element. This is the difference between the next element's
666 // position and the current element's end.
667 int64_t void_pos = m_data[data_idx]->m_pos + m_data[data_idx]->m_size;
668 int void_size = m_data[data_idx + 1]->m_pos - void_pos;
669
670 // If the difference is 0 then we have nothing to do.
671 if (0 == void_size) {
672 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 0, nothing further to do\n", data_idx));
673 return false;
674 }
675
676 // See if we have enough space to fit an EbmlVoid element in. An
677 // EbmlVoid element needs at least two bytes (one for the ID, one
678 // for the size).
679 if (1 == void_size) {
680 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1; move next element's head down one byte & enlarge its size portion\n", data_idx));
681
682 // No. The most compatible way to deal with this situation is to
683 // move the element ID of the following element one byte to the
684 // front and extend the following element's size field by one
685 // byte.
686
687 // If the following element's coded size length is 8 bytes
688 // already, we can make the head smaller instead and move it one
689 // byte up. That leaves a gap of two bytes which we can fill with
690 // an empty new EBML Void element. The variable `move_up` reflects
691 // this; it is true the coded size length is less than 8 bytes and
692 // the element's head can be moved up and false if we have to
693 // shrink it and fill the space with a new EBML void.
694
695 auto e = read_element(m_data[data_idx + 1]);
696
697 if (!e) {
698 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1: could not read following element\n", data_idx));
699 return false;
700 }
701
702 auto move_up = e->GetSizeLength() < 8;
703 auto new_pos = m_data[data_idx + 1]->m_pos + (move_up ? -1 : 1);
704
705 binary head[4 + 8]; // Class D + 64 bits coded size
706 unsigned int head_size = EBML_ID_LENGTH(static_cast<const EbmlId &>(*e));
707 EbmlId(*e).Fill(head);
708
709 int coded_size = CodedSizeLength(e->GetSize(), move_up ? e->GetSizeLength() + 1 : 7, true);
710 CodedValueLength(e->GetSize(), coded_size, &head[head_size]);
711 head_size += coded_size;
712
713 mxdebug_if(s_debug_void,
714 fmt::format("handle_void_elements({0}): void_size == 1: writing {1} header bytes at position {2} (ID length {3} coded size length {4} previous size length {5} element size {6}); bytes: {7}\n",
715 data_idx, head_size, new_pos, head_size - coded_size, coded_size, e->GetSizeLength(), e->GetSize(), mtx::string::to_hex(head, head_size)));
716
717 m_file->setFilePointer(new_pos);
718 m_file->write(head, head_size);
719
720 auto original_position = m_data[data_idx + 1]->m_pos;
721 m_data[data_idx + 1]->m_pos = new_pos;
722 m_data[data_idx + 1]->m_size += move_up ? 1 : -1;
723
724 if (!move_up) {
725 // Need to add an empty new EBML void element.
726 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1: adding new two-byte EBML void element at position {1}\n", data_idx, new_pos - 2));
727
728 m_file->setFilePointer(new_pos - 2);
729
730 EbmlVoid evoid;
731 evoid.SetSize(0);
732 evoid.Render(*m_file);
733
734 m_data.insert(m_data.begin() + data_idx + 1, kax_analyzer_data_c::create(EBML_ID(EbmlVoid), new_pos - 2, 2));
735 ++data_idx;
736 }
737
738 // Update meta seek indices for m_data[data_idx]'s new position.
739 e = read_element(m_data[data_idx + 1]);
740
741 mxdebug_if(s_debug_void, fmt::format("handle_void_elements({0}): void_size == 1: element re-read; now removing from meta seeks, merging void elements etc.\n", data_idx));
742
743 remove_from_meta_seeks(EbmlId(*e));
744 merge_void_elements();
745 add_to_meta_seek(e.get());
746 merge_void_elements();
747
748 if (Is<KaxCluster>(*e)) {
749 mxdebug_if(s_debug_void,
750 fmt::format("handle_void_elements({0}): shifted element is cluster; adjusting cues; original relative position {1} new relative position {2}\n",
751 data_idx, m_segment->GetRelativePosition(original_position), m_segment->GetRelativePosition(e->GetElementPosition())));
752 adjust_cues_for_cluster(static_cast<KaxCluster &>(*e), m_segment->GetRelativePosition(original_position));
753 }
754
755 return false;
756 }
757
758 m_file->setFilePointer(void_pos);
759
760 // Yes. Write a new EbmlVoid element and update the internal records.
761
762 // Calculating the void element's content size. This is not straight
763 // forward because there are special values for the total size for
764 // which the header size must be forced to be one byte more than would
765 // be strictly necessary in order to occupy the space. Here's an
766 // example:
767
768 // A two-byte header field (one for the ID, one for the content
769 // size) can have a content size of at most 127 bytes, meaning a
770 // total size of 129 is easy to achieve: set the content size to 127
771 // bytes, libEBML will calculate the required length of the content
772 // size field to be 1 and the resulting element's total size will be
773 // the desired 129 bytes.
774
775 // A content size of 128 would mean that the content size field must be
776 // at least two bytes long. Taking the element's ID into account this
777 // would mean a total element size of 128 + 2 + 1 = 131 bytes. So
778 // getting libEBML to produce an element of a total size of 131 is
779 // easy, too: set the content size to 128 bytes, and libEBML will do
780 // the rest.
781
782 // The problematic total size is a 130 bytes. There simply is no
783 // content size for which adding the minimal length of the content
784 // size field and 1 for the ID would result in a total size of 130
785 // bytes.
786
787 // The solution is writing the length field with more bytes than
788 // necessary. In the case of a total size of 130 bytes we could use
789 // the maximum one-byte content size = 127 bytes, add one byte for the
790 // ID and force the content size field's length to be two instead of
791 // one byte (0x407f instead of 0xff).
792
793 // Similar corner cases exist for the transition between content
794 // size field being two/three bytes, three/four bytes long etc. In
795 // order to keep the code simple we always use an eight-bytes long
796 // content size field if the total size is at least nine bytes and a
797 // one-byte long content size field otherwise.
798
799 EbmlVoid evoid;
800 if (void_size < 9)
801 evoid.SetSize(void_size - 2);
802
803 else {
804 evoid.SetSize(void_size - 9);
805 evoid.SetSizeLength(8);
806 }
807
808 evoid.Render(*m_file);
809
810 m_data.insert(m_data.begin() + data_idx + 1, kax_analyzer_data_c::create(EBML_ID(EbmlVoid), void_pos, void_size));
811
812 // Now check if we should overwrite the current element with the
813 // EbmlVoid element. That is the case if the current element's size
814 // is 0. In that case simply remove the element from the data
815 // vector.
816 if (0 == m_data[data_idx]->m_size)
817 m_data.erase(m_data.begin() + data_idx);
818
819 return true;
820 }
821
822 void
adjust_cues_for_cluster(KaxCluster const & cluster,uint64_t original_relative_position)823 kax_analyzer_c::adjust_cues_for_cluster(KaxCluster const &cluster,
824 uint64_t original_relative_position) {
825 static debugging_option_c s_debug_cues{"kax_analyzer_adjust_cues_for_cluster"};
826
827 auto cues = read_all(EBML_INFO(KaxCues));
828 if (!cues) {
829 mxdebug_if(s_debug_cues, fmt::format("adjust_cues_for_cluster: no cues found\n"));
830 return;
831 }
832
833 mxdebug_if(s_debug_cues, fmt::format("adjust_cues_for_cluster: cues found; looking for relative position {0}, rewriting to new position {1}\n", original_relative_position, m_segment->GetRelativePosition(cluster)));
834
835 auto modified = false;
836
837 for (auto const &potential_cue_point : *cues) {
838 if (!dynamic_cast<KaxCuePoint *>(potential_cue_point))
839 continue;
840
841 for (auto const &potential_track_positions : static_cast<KaxCuePoint &>(*potential_cue_point)) {
842 if (!dynamic_cast<KaxCueTrackPositions *>(potential_track_positions))
843 continue;
844
845 auto cluster_position = FindChild<KaxCueClusterPosition>(static_cast<KaxCueTrackPositions &>(*potential_track_positions));
846 if (!cluster_position || (cluster_position->GetValue() != original_relative_position))
847 continue;
848
849 cluster_position->SetValue(m_segment->GetRelativePosition(cluster));
850 modified = true;
851 }
852 }
853
854 mxdebug_if(s_debug_cues, fmt::format("adjust_cues_for_cluster: modifed? {0}\n", modified));
855
856 if (modified)
857 update_element(cues);
858 }
859
860 /** \brief Removes all seek entries for a specific element
861
862 Iterates over the level 1 elements in the file and reads each seek
863 head it finds. All entries for the given \c id are removed from
864 the seek head. If the seek head has been changed then it is
865 rewritten to its original position. The space freed up is filled
866 with a new EbmlVoid element.
867
868 \param id The ID of the elements that should be removed.
869 */
870 void
remove_from_meta_seeks(EbmlId id)871 kax_analyzer_c::remove_from_meta_seeks(EbmlId id) {
872 size_t data_idx;
873
874 for (data_idx = 0; m_data.size() > data_idx; ++data_idx) {
875 // We only have to do work on SeekHead elements. Skip the others.
876 if (!Is<KaxSeekHead>(m_data[data_idx]->m_id))
877 continue;
878
879 // Read the element from the file. Remember its size so that a new
880 // EbmlVoid element can be constructed afterwards.
881 auto element = read_element(data_idx);
882 auto seek_head = dynamic_cast<KaxSeekHead *>(element.get());
883 if (!seek_head)
884 throw uer_error_unknown;
885
886 int64_t old_size = seek_head->ElementSize(true);
887
888 // Iterate over its children and delete the ones we're looking for.
889 bool modified = false;
890 size_t sh_idx = 0;
891 while (seek_head->ListSize() > sh_idx) {
892 if (!Is<KaxSeek>((*seek_head)[sh_idx])) {
893 ++sh_idx;
894 continue;
895 }
896
897 auto seek_entry = dynamic_cast<KaxSeek *>((*seek_head)[sh_idx]);
898
899 if (!seek_entry->IsEbmlId(id)) {
900 ++sh_idx;
901 continue;
902 }
903
904 delete (*seek_head)[sh_idx];
905 seek_head->Remove(sh_idx);
906
907 modified = true;
908 }
909
910 // Only rewrite the element to the file if it has been modified.
911 if (!modified)
912 continue;
913
914 // If the seek head is now empty then simply remove and overwrite
915 // it with a void element.
916 if (0 == seek_head->ListSize()) {
917 m_data[data_idx]->m_size = 0;
918 handle_void_elements(data_idx);
919
920 continue;
921 }
922
923 // First make sure the new element is smaller than the old one.
924 // The following code cannot deal with the other case.
925 seek_head->UpdateSize(true);
926 int64_t new_size = seek_head->ElementSize(true);
927 if (new_size > old_size)
928 throw uer_error_unknown;
929
930 // Overwrite the element itself and update its internal record.
931 m_file->setFilePointer(m_data[data_idx]->m_pos);
932 seek_head->Render(*m_file, true);
933 if (m_doc_type_version_handler)
934 m_doc_type_version_handler->account(*seek_head, true);
935
936 m_data[data_idx]->m_size = new_size;
937
938 // Create a void element to cover the freed space.
939 handle_void_elements(data_idx);
940 }
941 }
942
943 /** \brief Overwrites all instances of a specific level 1 element
944
945 Iterates over the level 1 elements in the file and overwrites
946 each instance of a specific level 1 element given by \c id.
947 It is replaced with a new EbmlVoid element.
948
949 \param id The ID of the elements that should be overwritten.
950 */
951 void
overwrite_all_instances(EbmlId id)952 kax_analyzer_c::overwrite_all_instances(EbmlId id) {
953 size_t data_idx;
954
955 for (data_idx = 0; m_data.size() > data_idx; ++data_idx) {
956 // We only have to do work on specific elements. Skip the others.
957 if (m_data[data_idx]->m_id != id)
958 continue;
959
960 // Overwrite with a void element.
961 m_data[data_idx]->m_size = 0;
962 handle_void_elements(data_idx);
963 }
964 }
965
966 /** \brief Merges consecutive EbmlVoid elements into a single one
967
968 Iterates over the level 1 elements in the file and merges
969 consecutive EbmlVoid elements into a single one which covers
970 the same space as the smaller ones combined.
971
972 Void elements at the end of the file are removed as well.
973 */
974 void
merge_void_elements()975 kax_analyzer_c::merge_void_elements() {
976 size_t start_idx = 0;
977
978 while (m_data.size() > start_idx) {
979 // We only have to do work on EbmlVoid elements. Skip the others.
980 if (!Is<EbmlVoid>(m_data[start_idx]->m_id)) {
981 ++start_idx;
982 continue;
983 }
984
985 // Found an EbmlVoid element. See how many consecutive EbmlVoid elements
986 // there are at this position and calculate the combined size.
987 size_t end_idx = start_idx + 1;
988 size_t new_size = m_data[start_idx]->m_size;
989 while ((m_data.size() > end_idx) && Is<EbmlVoid>(m_data[end_idx]->m_id)) {
990 new_size += m_data[end_idx]->m_size;
991 ++end_idx;
992 }
993
994 // Is this only a single EbmlVoid element? If yes continue.
995 if (end_idx == (start_idx + 1)) {
996 start_idx += 2;
997 continue;
998 }
999
1000 // Write the new EbmlVoid element to the file.
1001 m_file->setFilePointer(m_data[start_idx]->m_pos);
1002
1003 EbmlVoid evoid;
1004 evoid.SetSize(new_size);
1005 evoid.UpdateSize();
1006 evoid.SetSize(new_size - evoid.HeadSize());
1007 evoid.Render(*m_file);
1008
1009 // Update the internal records to reflect the changes.
1010 m_data[start_idx]->m_size = new_size;
1011 m_data.erase(m_data.begin() + start_idx + 1, m_data.begin() + end_idx);
1012
1013 start_idx += 2;
1014 }
1015
1016 // See how many void elements there are at the end of the file.
1017 start_idx = m_data.size();
1018
1019 while ((0 < start_idx) && Is<EbmlVoid>(m_data[start_idx - 1]->m_id))
1020 --start_idx;
1021
1022 // If there are none then we're done.
1023 if (m_data.size() <= start_idx)
1024 return;
1025
1026 // Truncate the file after the last non-void element and update the segment size.
1027 m_file->truncate(m_data[start_idx]->m_pos);
1028 adjust_segment_size();
1029 }
1030
1031 /** \brief Finds a suitable spot for an element and writes it to the file
1032
1033 First, a suitable spot for the element is determined by looking at
1034 EbmlVoid elements. If none is found in the middle of the file then
1035 the element will be appended at the end.
1036
1037 Second, the element is written at the location determined in the
1038 first step. If EbmlVoid elements are overwritten then a new,
1039 smaller one is created which covers the remainder of the
1040 overwritten one.
1041
1042 Third, the internal records are updated to reflect the changes.
1043
1044 \param e Pointer to the element to write.
1045 \param write_defaults Boolean that decides whether or not elements
1046 which contain their default value are written to the file.
1047 */
1048 void
write_element(EbmlElement * e,bool write_defaults,placement_strategy_e strategy)1049 kax_analyzer_c::write_element(EbmlElement *e,
1050 bool write_defaults,
1051 placement_strategy_e strategy) {
1052 e->UpdateSize(write_defaults, true);
1053 int64_t element_size = e->ElementSize(write_defaults);
1054
1055 size_t data_idx;
1056 for (data_idx = (ps_anywhere == strategy ? 0 : m_data.size() - 1); m_data.size() > data_idx; ++data_idx) {
1057 // We're only interested in EbmlVoid elements. Skip the others.
1058 if (!Is<EbmlVoid>(m_data[data_idx]->m_id))
1059 continue;
1060
1061 // Skip the element if it doesn't provide enough space.
1062 if (m_data[data_idx]->m_size < element_size)
1063 continue;
1064
1065 // We've found our element. Overwrite it.
1066 m_file->setFilePointer(m_data[data_idx]->m_pos);
1067 e->Render(*m_file, write_defaults, false, true);
1068 if (m_doc_type_version_handler)
1069 m_doc_type_version_handler->account(*e, write_defaults);
1070
1071 // Update the internal records.
1072 m_data[data_idx]->m_id = EbmlId(*e);
1073 m_data[data_idx]->m_size = e->ElementSize(write_defaults);
1074
1075 // Create a new void element after the element we've just written.
1076 handle_void_elements(data_idx);
1077
1078 // We're done.
1079 return;
1080 }
1081
1082 // We haven't found a suitable place. So store the element at the end of the file
1083 // and update the internal records.
1084 m_file->setFilePointer(0, seek_end);
1085 e->Render(*m_file, write_defaults, false, true);
1086 if (m_doc_type_version_handler)
1087 m_doc_type_version_handler->account(*e, write_defaults);
1088 m_data.push_back(kax_analyzer_data_c::create(EbmlId(*e), m_file->getFilePointer() - e->ElementSize(write_defaults), e->ElementSize(write_defaults)));
1089
1090 // Adjust the segment's size.
1091 adjust_segment_size();
1092 }
1093
1094 int
ensure_front_seek_head_links_to(unsigned int seek_head_idx)1095 kax_analyzer_c::ensure_front_seek_head_links_to(unsigned int seek_head_idx) {
1096 // It is possible that the seek head at the front has been removed
1097 // due to moving the seek head at the back one byte to the front
1098 // which calls remove_from_meta_seeks() with that second seek head.
1099
1100 // If this seek head is located at the front then we've got nothing
1101 // to do. At the same time look for a seek head at the start.
1102
1103 mxdebug_if(m_debug, fmt::format("ensure_front_seek_head_links_to start\n"));
1104
1105 std::optional<unsigned int> first_seek_head_idx;
1106
1107 for (int data_idx = 0, end = m_data.size(); end > data_idx; ++data_idx) {
1108 auto const &data = *m_data[data_idx];
1109
1110 if (Is<KaxSeekHead>(data.m_id)) {
1111 if (static_cast<unsigned int>(data_idx) == seek_head_idx)
1112 return seek_head_idx;
1113
1114 first_seek_head_idx = data_idx;
1115
1116 } else if (Is<KaxCluster>(data.m_id))
1117 break;
1118 }
1119
1120 // This seek head is not located at the start. If there is another
1121 // seek head present then the rest of the code has already taken
1122 // care of everything.
1123 if (first_seek_head_idx)
1124 return *first_seek_head_idx;
1125
1126 // Our seek head is at the end and there's no seek head at the
1127 // start.
1128 mxdebug_if(m_debug, fmt::format(" no seek head at start but one at the end\n"));
1129
1130 auto seek_head_position = m_segment->GetRelativePosition(m_data[seek_head_idx]->m_pos);
1131 auto seek_head_id = memory_c::alloc(4);
1132
1133 put_uint32_be(seek_head_id->get_buffer(), EBML_ID(KaxSeekHead).GetValue());
1134
1135 auto new_seek_head = ebml_master_cptr{
1136 mtx::construct::cons<KaxSeekHead>(mtx::construct::cons<KaxSeek>(new KaxSeekID, seek_head_id,
1137 new KaxSeekPosition, seek_head_position))
1138 };
1139
1140 new_seek_head->UpdateSize();
1141 auto needed_size = static_cast<int64_t>(new_seek_head->ElementSize(true));
1142 auto first_time = true;
1143
1144 while (first_time) {
1145 mxdebug_if(m_debug, fmt::format(" looking for place for the new seek head at the start…\n"));
1146 // Find a place at the front with enough space.
1147 for (int data_idx = 0, end = m_data.size(); data_idx < end; ++data_idx) {
1148 auto &data = *m_data[data_idx];
1149
1150 if (Is<KaxCluster>(data.m_id))
1151 break;
1152
1153 if (!Is<EbmlVoid>(data.m_id))
1154 continue;
1155
1156 // Avoid the case with the space being only one byte larger than
1157 // what we need. That would require moving the next element one
1158 // byte to the front triggering seek head adjustments.
1159 if ((data.m_size != needed_size) && (data.m_size < (needed_size + 2)))
1160 continue;
1161
1162 mxdebug_if(m_debug, fmt::format(" got one! writing at file position {0}\n", data.m_pos));
1163 // Got a place. Write the seek head, update the internal record &
1164 // write a new void element.
1165 m_file->setFilePointer(data.m_pos);
1166 new_seek_head->Render(*m_file, true);
1167 if (m_doc_type_version_handler)
1168 m_doc_type_version_handler->account(*new_seek_head, true);
1169
1170 data.m_size = needed_size;
1171 data.m_id = EBML_ID(KaxSeekHead);
1172
1173 handle_void_elements(data_idx);
1174
1175 return data_idx;
1176 }
1177
1178 mxdebug_if(m_debug, fmt::format(" no place, moving level 1 elements and trying again\n"));
1179 // We haven't found a spot. Move an existing level 1 element to the
1180 // end if we haven't done that yet and try again. Otherwise fail.
1181 if (first_time && !move_level1_element_before_cluster_to_end_of_file())
1182 break;
1183
1184 first_time = false;
1185 }
1186
1187 throw uer_error_unknown;
1188
1189 return -1;
1190 }
1191
1192 std::pair<bool, int>
try_adding_to_existing_meta_seek(EbmlElement * e)1193 kax_analyzer_c::try_adding_to_existing_meta_seek(EbmlElement *e) {
1194 auto first_seek_head_idx = -1;
1195
1196 for (auto data_idx = 0u; m_data.size() > data_idx; ++data_idx) {
1197 // We only have to do work on SeekHead elements. Skip the others.
1198 if (!Is<KaxSeekHead>(m_data[data_idx]->m_id))
1199 continue;
1200
1201 // Calculate how much free space there is behind the seek head.
1202 // merge_void_elemens() guarantees that there is no EbmlVoid element
1203 // at the end of the file and that all consecutive EbmlVoid elements
1204 // have been merged into a single element.
1205 size_t available_space = m_data[data_idx]->m_size;
1206 if (((data_idx + 1) < m_data.size()) && Is<EbmlVoid>(m_data[data_idx + 1]->m_id))
1207 available_space += m_data[data_idx + 1]->m_size;
1208
1209 // Read the seek head, index the element and see how much space it needs.
1210 auto element = read_element(data_idx);
1211 auto seek_head = dynamic_cast<KaxSeekHead *>(element.get());
1212 if (!seek_head)
1213 throw uer_error_unknown;
1214
1215 if (-1 == first_seek_head_idx)
1216 first_seek_head_idx = data_idx;
1217
1218 seek_head->IndexThis(*e, *m_segment.get());
1219 seek_head->UpdateSize(true);
1220
1221 // We can use this seek head if it is at the end of the file, or if there
1222 // is enough space behind it in form of void elements.
1223 if ((m_data.size() != (data_idx + 1)) && (seek_head->ElementSize(true) > available_space))
1224 continue;
1225
1226 // Write the seek head.
1227 m_file->setFilePointer(m_data[data_idx]->m_pos);
1228 seek_head->Render(*m_file, true);
1229 if (m_doc_type_version_handler)
1230 m_doc_type_version_handler->account(*seek_head, true);
1231
1232 // Update the internal record.
1233 m_data[data_idx]->m_size = seek_head->ElementSize(true);
1234
1235 // If this seek head is located at the end of the file then we have
1236 // to adjust the segment size.
1237 if (m_data.size() == (data_idx + 1))
1238 adjust_segment_size();
1239
1240 else
1241 // Otherwise create an EbmlVoid to fill the gap (if any).
1242 handle_void_elements(data_idx);
1243
1244 // Now ensure that the seek head is either at the front of the
1245 // file itself or that it is referenced from one at the front. It
1246 // is possible that the seek head at the front has been removed
1247 // due to moving the seek head at the back one byte to the front
1248 // which calls remove_from_meta_seeks() with that second seek head.
1249 first_seek_head_idx = ensure_front_seek_head_links_to(data_idx);
1250
1251 // We're done.
1252 return { true, first_seek_head_idx };
1253 }
1254
1255 return { false, first_seek_head_idx };
1256 }
1257
1258 void
move_seek_head_to_end_and_create_new_one_at_start(EbmlElement * e,int first_seek_head_idx)1259 kax_analyzer_c::move_seek_head_to_end_and_create_new_one_at_start(EbmlElement *e,
1260 int first_seek_head_idx) {
1261 // Read the first seek head…
1262 auto element = read_element(first_seek_head_idx);
1263 auto seek_head = dynamic_cast<KaxSeekHead *>(element.get());
1264 if (!seek_head)
1265 throw uer_error_unknown;
1266
1267 // …index our element…
1268 seek_head->IndexThis(*e, *m_segment.get());
1269 seek_head->UpdateSize(true);
1270
1271 // …write the seek head at the end of the file…
1272 m_file->setFilePointer(0, seek_end);
1273 seek_head->Render(*m_file, true);
1274 if (m_doc_type_version_handler)
1275 m_doc_type_version_handler->account(*seek_head, true);
1276
1277 // …and update the internal records.
1278 m_data.push_back(kax_analyzer_data_c::create(EBML_ID(KaxSeekHead), seek_head->GetElementPosition(), seek_head->ElementSize(true)));
1279
1280 // Update the segment size.
1281 adjust_segment_size();
1282
1283 // Create a new seek head and write it to the file.
1284 std::shared_ptr<KaxSeekHead> forward_seek_head(new KaxSeekHead);
1285 forward_seek_head->IndexThis(*seek_head, *m_segment.get());
1286 forward_seek_head->UpdateSize(true);
1287
1288 m_file->setFilePointer(m_data[first_seek_head_idx]->m_pos);
1289 forward_seek_head->Render(*m_file, true);
1290 if (m_doc_type_version_handler)
1291 m_doc_type_version_handler->account(*forward_seek_head, true);
1292
1293 // Update the internal record to reflect that there's a new seek head.
1294 m_data[first_seek_head_idx]->m_size = forward_seek_head->ElementSize(true);
1295
1296 // Create a void element behind the small new first seek head.
1297 handle_void_elements(first_seek_head_idx);
1298
1299 // We're done.
1300 }
1301
1302 bool
create_new_meta_seek_at_start(EbmlElement * e)1303 kax_analyzer_c::create_new_meta_seek_at_start(EbmlElement *e) {
1304 auto new_seek_head = std::make_shared<KaxSeekHead>();
1305 new_seek_head->IndexThis(*e, *m_segment.get());
1306 new_seek_head->UpdateSize(true);
1307
1308 for (auto data_idx = 0u; m_data.size() > data_idx; ++data_idx) {
1309 // We can only overwrite void elements. Skip the others.
1310 if (!Is<EbmlVoid>(m_data[data_idx]->m_id))
1311 continue;
1312
1313 // Skip the element if it doesn't offer enough space for the seek head.
1314 if (m_data[data_idx]->m_size < static_cast<int64_t>(new_seek_head->ElementSize(true)))
1315 continue;
1316
1317 // We've found a suitable spot. Write the seek head.
1318 m_file->setFilePointer(m_data[data_idx]->m_pos);
1319 new_seek_head->Render(*m_file, true);
1320 if (m_doc_type_version_handler)
1321 m_doc_type_version_handler->account(*new_seek_head, true);
1322
1323 // Adjust the internal records for the new seek head.
1324 m_data[data_idx]->m_size = new_seek_head->ElementSize(true);
1325 m_data[data_idx]->m_id = EBML_ID(KaxSeekHead);
1326
1327 // Write a void element after the newly written seek head in order to
1328 // cover the space previously occupied by the old void element.
1329 handle_void_elements(data_idx);
1330
1331 // We're done.
1332 return true;
1333 }
1334
1335 return false;
1336 }
1337
1338 bool
move_level1_element_before_cluster_to_end_of_file()1339 kax_analyzer_c::move_level1_element_before_cluster_to_end_of_file() {
1340 auto candidates_for_moving = std::vector<std::pair<int, unsigned int> >{};
1341
1342 for (auto data_idx = 0u; m_data.size() > data_idx; ++data_idx) {
1343 auto const &id = m_data[data_idx]->m_id;
1344
1345 if (Is<KaxCluster>(id))
1346 break;
1347
1348 if (mtx::included_in(id, EBML_ID(KaxAttachments)))
1349 candidates_for_moving.emplace_back(10, data_idx);
1350
1351 else if (id == EBML_ID(KaxTracks))
1352 candidates_for_moving.emplace_back(20, data_idx);
1353
1354 else if (id == EBML_ID(KaxInfo))
1355 candidates_for_moving.emplace_back(30, data_idx);
1356 }
1357
1358 // Have we found at least one suitable element before the first
1359 // cluster? If not bail out.
1360 if (candidates_for_moving.empty())
1361 return false;
1362
1363 // Sort by their importance first and position second. Lower
1364 // importance means the element is more likely to be moved.
1365 std::sort(candidates_for_moving.begin(), candidates_for_moving.end());
1366
1367 auto const to_move_idx = candidates_for_moving.front().second;
1368 auto const &to_move = *m_data[to_move_idx];
1369
1370 mxdebug_if(m_debug, fmt::format("Moving level 1 at index {0} to the end ({1})\n", to_move_idx, to_move.to_string()));
1371
1372 // We read the element and write it again at the end of the file.
1373 m_file->setFilePointer(to_move.m_pos);
1374 auto buf = m_file->read(to_move.m_size);
1375
1376 m_file->setFilePointer(0, seek_end);
1377 auto position = m_file->getFilePointer();
1378
1379 m_file->write(buf);
1380
1381 // Update the internal records.
1382 m_data.push_back(kax_analyzer_data_c::create(to_move.m_id, position, to_move.m_size));
1383
1384 // Overwrite with a void element.
1385 m_data[to_move_idx]->m_size = 0;
1386 handle_void_elements(to_move_idx);
1387
1388 debug_dump_elements_maybe("move_level1_element_before_cluster_to_end_of_file");
1389
1390 verify_data_structures_against_file("move_level1_element_before_cluster_to_end_of_file");
1391
1392 // And add it to a meta seek element.
1393 auto e = read_element(m_data.size() - 1);
1394 if (!e)
1395 throw uer_error_unknown;
1396
1397 add_to_meta_seek(e.get());
1398
1399 return true;
1400 }
1401
1402 /** \brief Adds an element to one of the meta seek entries
1403
1404 This function iterates over all meta seek elements and looks
1405 for one that has enough space (via following EbmlVoid elements or
1406 because it is located at the end of the file) for indexing
1407 the element \c e.
1408
1409 If no such element is found then a new meta seek element is
1410 created at an appropriate place, and that element is indexed.
1411
1412 \param e Pointer to the element to index.
1413 */
1414 void
add_to_meta_seek(EbmlElement * e)1415 kax_analyzer_c::add_to_meta_seek(EbmlElement *e) {
1416 auto result = try_adding_to_existing_meta_seek(e);
1417
1418 if (result.first)
1419 return;
1420
1421 // No suitable meta seek head found -- we have to write a new one.
1422
1423 // If we have found a prior seek head then we copy that one to the end
1424 // of the file including the newly indexed element and write a one-element
1425 // seek head at the first meta seek's position pointing to the one at the
1426 // end.
1427
1428 if (-1 != result.second) {
1429 move_seek_head_to_end_and_create_new_one_at_start(e, result.second);
1430 return;
1431 }
1432
1433 // We don't have a seek head to copy. Create one before the first chapter if possible.
1434 if (create_new_meta_seek_at_start(e))
1435 return;
1436
1437 // We haven't found a place for the new seek head before the first
1438 // cluster. Therefore we must try to move an existing level 1
1439 // element to the end of the file first.
1440 if (move_level1_element_before_cluster_to_end_of_file()) {
1441 add_to_meta_seek(e);
1442 return;
1443 }
1444
1445 // This is not supported at the moment.
1446 throw uer_error_not_indexable;
1447 }
1448
1449 ebml_master_cptr
read_all(const EbmlCallbacks & callbacks)1450 kax_analyzer_c::read_all(const EbmlCallbacks &callbacks) {
1451 reopen_file();
1452
1453 ebml_master_cptr master;
1454 EbmlStream es(*m_file);
1455 size_t i;
1456
1457 for (i = 0; m_data.size() > i; ++i) {
1458 kax_analyzer_data_c &data = *m_data[i].get();
1459 if (EBML_INFO_ID(callbacks) != data.m_id)
1460 continue;
1461
1462 m_file->setFilePointer(data.m_pos);
1463 int upper_lvl_el = 0;
1464 EbmlElement *element = es.FindNextElement(EBML_CLASS_CONTEXT(KaxSegment), upper_lvl_el, 0xFFFFFFFFL, true);
1465 if (!element)
1466 continue;
1467
1468 if (EbmlId(*element) != EBML_INFO_ID(callbacks)) {
1469 delete element;
1470 continue;
1471 }
1472
1473 EbmlElement *l2 = nullptr;
1474 element->Read(*m_stream, EBML_INFO_CONTEXT(callbacks), upper_lvl_el, l2, true);
1475
1476 if (!master)
1477 master = ebml_master_cptr(static_cast<EbmlMaster *>(element));
1478 else {
1479 auto src = static_cast<EbmlMaster *>(element);
1480 while (src->ListSize() > 0) {
1481 master->PushElement(*(*src)[0]);
1482 src->Remove(0);
1483 }
1484 delete element;
1485 }
1486 }
1487
1488 if (master && (master->ListSize() == 0))
1489 master.reset();
1490
1491 return master;
1492 }
1493
1494 void
read_all_meta_seeks()1495 kax_analyzer_c::read_all_meta_seeks() {
1496 m_meta_seeks_by_position.clear();
1497
1498 unsigned int i, num_entries = m_data.size();
1499 std::map<int64_t, bool> positions_found;
1500
1501 for (i = 0; i < num_entries; i++)
1502 positions_found[m_data[i]->m_pos] = true;
1503
1504 for (i = 0; i < num_entries; i++)
1505 if (Is<KaxSeekHead>(m_data[i]->m_id))
1506 read_meta_seek(m_data[i]->m_pos, positions_found);
1507
1508 std::sort(m_data.begin(), m_data.end());
1509 }
1510
1511 void
read_meta_seek(uint64_t pos,std::map<int64_t,bool> & positions_found)1512 kax_analyzer_c::read_meta_seek(uint64_t pos,
1513 std::map<int64_t, bool> &positions_found) {
1514 if (m_meta_seeks_by_position[pos])
1515 return;
1516
1517 m_meta_seeks_by_position[pos] = true;
1518
1519 m_file->setFilePointer(pos);
1520
1521 int upper_lvl_el = 0;
1522 EbmlElement *l1 = m_stream->FindNextElement(EBML_CONTEXT(m_segment), upper_lvl_el, 0xFFFFFFFFL, true, 1);
1523
1524 if (!l1)
1525 return;
1526
1527 if (!Is<KaxSeekHead>(l1)) {
1528 delete l1;
1529 return;
1530 }
1531
1532 EbmlElement *l2 = nullptr;
1533 auto master = static_cast<EbmlMaster *>(l1);
1534 master->Read(*m_stream, EBML_CONTEXT(l1), upper_lvl_el, l2, true);
1535
1536 unsigned int i;
1537 for (i = 0; master->ListSize() > i; i++) {
1538 if (!Is<KaxSeek>((*master)[i]))
1539 continue;
1540
1541 auto seek = static_cast<KaxSeek *>((*master)[i]);
1542 auto seek_id = FindChild<KaxSeekID>(seek);
1543 int64_t seek_pos = seek->Location() + m_segment->GetElementPosition() + m_segment->HeadSize();
1544
1545 if ((0 == pos) || !seek_id)
1546 continue;
1547
1548 if (positions_found[seek_pos])
1549 continue;
1550
1551 EbmlId the_id(seek_id->GetBuffer(), seek_id->GetSize());
1552 m_data.push_back(kax_analyzer_data_c::create(the_id, seek_pos, -1));
1553 positions_found[seek_pos] = true;
1554
1555 if (Is<KaxSeekHead>(the_id))
1556 read_meta_seek(seek_pos, positions_found);
1557 }
1558
1559 delete l1;
1560 }
1561
1562 void
fix_element_sizes(uint64_t file_size)1563 kax_analyzer_c::fix_element_sizes(uint64_t file_size) {
1564 unsigned int i;
1565 for (i = 0; m_data.size() > i; ++i)
1566 if (-1 == m_data[i]->m_size)
1567 m_data[i]->m_size = ((i + 1) < m_data.size() ? m_data[i + 1]->m_pos : file_size) - m_data[i]->m_pos;
1568 }
1569
1570 void
fix_unknown_size_for_last_level1_element()1571 kax_analyzer_c::fix_unknown_size_for_last_level1_element() {
1572 if (!m_data.size())
1573 return;
1574
1575 auto &data = *m_data.back();
1576 if (data.m_size_known)
1577 return;
1578
1579 mxinfo(fmt::format("chunky bacon! data {0} seg end {1}\n", data.to_string(), m_segment_end));
1580
1581 auto elt = read_element(m_data.size() - 1);
1582 if (!elt)
1583 throw uer_error_fixing_last_element_unknown_size_failed;
1584
1585 auto head_size = static_cast<unsigned int>(elt->HeadSize());
1586 auto actual_size = m_segment_end - (elt->GetElementPosition() + head_size);
1587 auto required_bytes = CodedSizeLength(actual_size, 0);
1588 auto available_bytes = elt->GetSizeLength();
1589
1590 if ((available_bytes < required_bytes) || !elt->ForceSize(actual_size))
1591 throw uer_error_fixing_last_element_unknown_size_failed;
1592
1593 elt->OverwriteHead(*m_stream, true);
1594
1595 data.m_size = actual_size + head_size;
1596 data.m_size_known = true;
1597
1598 log_debug_message(fmt::format("fix_unknown_size_for_last_level1_element: element fixed to new payload size {0} head size {1} segment end {2}\n", actual_size, head_size, m_segment_end));
1599 }
1600
1601 kax_analyzer_c::placement_strategy_e
get_placement_strategy_for(EbmlElement * e)1602 kax_analyzer_c::get_placement_strategy_for(EbmlElement *e) {
1603 return Is<KaxTags>(e) ? ps_end : ps_anywhere;
1604 }
1605
1606 EbmlHead &
get_ebml_head()1607 kax_analyzer_c::get_ebml_head() {
1608 return *m_ebml_head;
1609 }
1610
1611 bool
is_webm() const1612 kax_analyzer_c::is_webm()
1613 const {
1614 return m_is_webm;
1615 }
1616
1617 uint64_t
get_segment_pos() const1618 kax_analyzer_c::get_segment_pos()
1619 const {
1620 if (!m_segment)
1621 return 0;
1622
1623 return m_segment->GetElementPosition();
1624 }
1625
1626 uint64_t
get_segment_data_start_pos() const1627 kax_analyzer_c::get_segment_data_start_pos()
1628 const {
1629 if (!m_segment)
1630 return 0;
1631
1632 return m_segment->GetElementPosition() + m_segment->HeadSize();
1633 }
1634
1635 mtx::bits::value_cptr
read_segment_uid_from(std::string const & file_name)1636 kax_analyzer_c::read_segment_uid_from(std::string const &file_name) {
1637 try {
1638 auto analyzer = std::make_shared<kax_analyzer_c>(file_name);
1639 auto ok = analyzer
1640 ->set_parse_mode(kax_analyzer_c::parse_mode_fast)
1641 .set_open_mode(MODE_READ)
1642 .process();
1643
1644 if (ok) {
1645 auto element = analyzer->read_all(EBML_INFO(KaxInfo));
1646 auto segment_info = dynamic_cast<KaxInfo *>(element.get());
1647 auto segment_uid = segment_info ? FindChild<KaxSegmentUID>(segment_info) : nullptr;
1648
1649 if (segment_uid)
1650 return std::make_shared<mtx::bits::value_c>(*segment_uid);
1651 }
1652
1653 } catch (mtx::mm_io::exception &ex) {
1654 throw mtx::kax_analyzer_x{fmt::format(Y("The file '{0}' could not be opened for reading: {1}."), file_name, ex)};
1655
1656 } catch (mtx::kax_analyzer_x &ex) {
1657 throw mtx::kax_analyzer_x{fmt::format(Y("The file '{0}' could not be opened for reading: {1}."), file_name, ex)};
1658
1659 } catch (...) {
1660 throw mtx::kax_analyzer_x{fmt::format(Y("The file '{0}' could not be opened or parsed."), file_name)};
1661
1662 }
1663
1664 throw mtx::kax_analyzer_x{fmt::format(Y("No segment UID could be found in the file '{0}'."), file_name)};
1665 }
1666
1667 int
find(EbmlId const & id)1668 kax_analyzer_c::find(EbmlId const &id) {
1669 for (int idx = 0, end = m_data.size(); idx < end; idx++)
1670 if (id == m_data[idx]->m_id)
1671 return idx;
1672
1673 return -1;
1674 }
1675
1676 void
with_elements(const EbmlId & id,std::function<void (kax_analyzer_data_c const &)> worker) const1677 kax_analyzer_c::with_elements(const EbmlId &id,
1678 std::function<void(kax_analyzer_data_c const &)> worker)
1679 const {
1680 for (auto const &data : m_data)
1681 if (data->m_id == id)
1682 worker(*data);
1683 }
1684
1685 void
determine_webm()1686 kax_analyzer_c::determine_webm() {
1687 auto doc_type = FindChild<EDocType>(*m_ebml_head);
1688 m_is_webm = doc_type && (doc_type->GetValue() == "webm");
1689 }
1690
1691
1692 // ------------------------------------------------------------
1693
1694 bool m_show_progress;
1695 int m_previous_percentage;
1696
console_kax_analyzer_c(std::string file_name)1697 console_kax_analyzer_c::console_kax_analyzer_c(std::string file_name)
1698 : kax_analyzer_c(file_name)
1699 , m_show_progress(false)
1700 , m_previous_percentage(-1)
1701 {
1702 }
1703
1704 void
set_show_progress(bool show_progress)1705 console_kax_analyzer_c::set_show_progress(bool show_progress) {
1706 if (-1 == m_previous_percentage)
1707 m_show_progress = show_progress;
1708 }
1709
1710 void
show_progress_start(int64_t)1711 console_kax_analyzer_c::show_progress_start(int64_t) {
1712 if (!m_show_progress)
1713 return;
1714
1715 m_previous_percentage = -1;
1716 show_progress_running(0);
1717 }
1718
1719 bool
show_progress_running(int percentage)1720 console_kax_analyzer_c::show_progress_running(int percentage) {
1721 if (!m_show_progress || (percentage == m_previous_percentage))
1722 return true;
1723
1724 std::string full_bar( percentage * CONSOLE_PERCENTAGE_WIDTH / 100, '=');
1725 std::string empty_bar((100 - percentage) * CONSOLE_PERCENTAGE_WIDTH / 100, ' ');
1726
1727 mxinfo(fmt::format(Y("Progress: [{0}{1}] {2}%"), full_bar, empty_bar, percentage));
1728 mxinfo("\r");
1729
1730 m_previous_percentage = percentage;
1731
1732 return true;
1733 }
1734
1735 void
show_progress_done()1736 console_kax_analyzer_c::show_progress_done() {
1737 if (!m_show_progress)
1738 return;
1739
1740 show_progress_running(100);
1741 mxinfo("\n");
1742 }
1743
1744 void
debug_abort_process()1745 console_kax_analyzer_c::debug_abort_process() {
1746 }
1747