1 /*
2    mkvmerge -- utility for splicing together matroska files
3    from component media subtypes
4 
5    Distributed under the GPL v2
6    see the file COPYING for details
7    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8 
9    quick Matroska file parsing
10 
11    Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13 
14 #pragma once
15 
16 #include "common/common_pch.h"
17 
18 #include <ebml/EbmlHead.h>
19 #include <matroska/KaxSegment.h>
20 
21 #include "common/ebml.h"
22 #include "common/mm_io.h"
23 
24 namespace libmatroska {
25 class KaxCluster;
26 }
27 
28 namespace mtx {
29 class doc_type_version_handler_c;
30 
31 namespace bits {
32 class value_c;
33 using value_cptr = std::shared_ptr<value_c>;
34 }}
35 
36 class kax_analyzer_data_c;
37 using kax_analyzer_data_cptr = std::shared_ptr<kax_analyzer_data_c>;
38 
39 class kax_analyzer_data_c {
40 public:
41   libebml::EbmlId m_id;
42   uint64_t m_pos;
43   int64_t m_size;
44   bool m_size_known;
45 
46 public:                         // Static functions
47   static kax_analyzer_data_cptr create(const libebml::EbmlId id, uint64_t pos, int64_t size, bool size_known = true) {
48     return std::make_shared<kax_analyzer_data_c>(id, pos, size, size_known);
49   }
50 
51 public:
kax_analyzer_data_c(const libebml::EbmlId id,uint64_t pos,int64_t size,bool size_known)52   kax_analyzer_data_c(const libebml::EbmlId id, uint64_t pos, int64_t size, bool size_known)
53     : m_id{id}
54     , m_pos{pos}
55     , m_size{size}
56     , m_size_known{size_known}
57   {
58   }
59 
60   std::string to_string() const;
61 };
62 
63 bool operator <(const kax_analyzer_data_cptr &d1, const kax_analyzer_data_cptr &d2);
64 
65 namespace mtx {
66   class kax_analyzer_x: public exception {
67   protected:
68     std::string m_message;
69   public:
kax_analyzer_x(const std::string & message)70     kax_analyzer_x(const std::string &message) : m_message{message} { }
~kax_analyzer_x()71     virtual ~kax_analyzer_x() throw() { }
72 
what()73     virtual const char *what() const throw() {
74       return m_message.c_str();
75     }
76   };
77 }
78 
79 class kax_analyzer_c {
80 public:
81   enum update_element_result_e {
82     uer_success,
83     uer_error_segment_size_for_element,
84     uer_error_segment_size_for_meta_seek,
85     uer_error_meta_seek,
86     uer_error_not_indexable,
87     uer_error_opening_for_reading,
88     uer_error_opening_for_writing,
89     uer_error_fixing_last_element_unknown_size_failed,
90     uer_error_unknown,
91   };
92 
93   enum parse_mode_e {
94     parse_mode_fast,
95     parse_mode_full,
96   };
97 
98   enum placement_strategy_e {
99     ps_anywhere,
100     ps_end,
101   };
102 
103 private:
104   std::vector<kax_analyzer_data_cptr> m_data;
105   std::string m_file_name;
106   mm_io_cptr m_file;
107   bool m_close_file{true};
108   std::shared_ptr<libmatroska::KaxSegment> m_segment;
109   std::shared_ptr<libebml::EbmlHead> m_ebml_head;
110   uint64_t m_segment_end{};
111   std::map<int64_t, bool> m_meta_seeks_by_position;
112   std::shared_ptr<libebml::EbmlStream> m_stream;
113   debugging_option_c m_debug{"kax_analyzer"};
114   parse_mode_e m_parse_mode{parse_mode_full};
115   open_mode m_open_mode{MODE_WRITE};
116   bool m_throw_on_error{};
117   std::optional<uint64_t> m_parser_start_position;
118   bool m_is_webm{};
119   mtx::doc_type_version_handler_c *m_doc_type_version_handler{};
120 
121 public:                         // Static functions
122   static bool probe(std::string file_name);
123 
124 public:
125   kax_analyzer_c(std::string file_name);
126   kax_analyzer_c(mm_io_cptr const &file);
127   virtual ~kax_analyzer_c();
128 
129   virtual update_element_result_e update_element(libebml::EbmlElement *e, bool write_defaults = false, bool add_mandatory_elements_if_missing = true);
130   virtual update_element_result_e update_element(ebml_element_cptr const &e, bool write_defaults = false, bool add_mandatory_elements_if_missing = true);
131 
132   virtual update_element_result_e remove_elements(libebml::EbmlId const &id);
133 
134   virtual update_element_result_e update_uid_referrals(std::unordered_map<uint64_t, uint64_t> const &track_uid_changes);
135 
136   virtual ebml_master_cptr read_all(const libebml::EbmlCallbacks &callbacks);
137   virtual ebml_element_cptr read_element(kax_analyzer_data_c const &element_data);
138   virtual ebml_element_cptr read_element(kax_analyzer_data_cptr const &element_data);
139   virtual ebml_element_cptr read_element(unsigned int pos);
140 
141   virtual void with_elements(const libebml::EbmlId &id, std::function<void(kax_analyzer_data_c const &)> worker) const;
142   virtual int find(libebml::EbmlId const &id);
143 
144   virtual libebml::EbmlHead &get_ebml_head();
145   virtual bool is_webm() const;
146 
147   virtual uint64_t get_segment_pos() const;
148   virtual uint64_t get_segment_data_start_pos() const;
149 
150   virtual kax_analyzer_c &set_parse_mode(parse_mode_e parse_mode);
151   virtual kax_analyzer_c &set_open_mode(open_mode mode);
152   virtual kax_analyzer_c &set_throw_on_error(bool throw_on_error);
153   virtual kax_analyzer_c &set_parser_start_position(uint64_t position);
154   virtual kax_analyzer_c &set_doc_type_version_handler(mtx::doc_type_version_handler_c *handler);
155 
156   virtual bool process();
157 
show_progress_start(int64_t)158   virtual void show_progress_start(int64_t /* size */) {
159   }
show_progress_running(int)160   virtual bool show_progress_running(int /* percentage */) {
161     return true;
162   }
show_progress_done()163   virtual void show_progress_done() {
164   }
165 
log_debug_message(const std::string & message)166   virtual void log_debug_message(const std::string &message) {
167     _log_debug_message(message);
168   }
debug_abort_process()169   virtual void debug_abort_process() {
170     mxexit(1);
171   }
172 
173   virtual void close_file();
174   virtual void reopen_file();
175   virtual void reopen_file_for_writing();
get_file()176   virtual mm_io_c &get_file() {
177     return *m_file;
178   }
179 
180   static placement_strategy_e get_placement_strategy_for(libebml::EbmlElement *e);
get_placement_strategy_for(ebml_element_cptr e)181   static placement_strategy_e get_placement_strategy_for(ebml_element_cptr e) {
182     return get_placement_strategy_for(e.get());
183   }
184 
185   static mtx::bits::value_cptr read_segment_uid_from(std::string const &file_name);
186 
187 protected:
188   virtual void _log_debug_message(const std::string &message);
189 
190   virtual void remove_from_meta_seeks(libebml::EbmlId id);
191   virtual void overwrite_all_instances(libebml::EbmlId id);
192   virtual void merge_void_elements();
193   virtual void write_element(libebml::EbmlElement *e, bool write_defaults, placement_strategy_e strategy);
194   virtual void add_to_meta_seek(libebml::EbmlElement *e);
195   virtual std::pair<bool, int> try_adding_to_existing_meta_seek(libebml::EbmlElement *e);
196   virtual void move_seek_head_to_end_and_create_new_one_at_start(libebml::EbmlElement *e, int first_seek_head_idx);
197   virtual bool create_new_meta_seek_at_start(libebml::EbmlElement *e);
198   virtual bool move_level1_element_before_cluster_to_end_of_file();
199   virtual int ensure_front_seek_head_links_to(unsigned int seek_head_idx);
200 
201   virtual void adjust_segment_size();
202   virtual bool handle_void_elements(size_t data_idx);
203 
204   virtual bool analyzer_debugging_requested(const std::string &section);
205   virtual void debug_dump_elements();
206   virtual void debug_dump_elements_maybe(const std::string &hook_name);
207   virtual bool validate_and_break(std::string const &hook_name);
208   virtual void validate_data_structures(const std::string &hook_name);
209   virtual void verify_data_structures_against_file(const std::string &hook_name);
210 
211   virtual void read_all_meta_seeks();
212   virtual void read_meta_seek(uint64_t pos, std::map<int64_t, bool> &positions_found);
213   virtual void fix_element_sizes(uint64_t file_size);
214   virtual void fix_unknown_size_for_last_level1_element();
215   virtual void adjust_cues_for_cluster(libmatroska::KaxCluster const &cluster, uint64_t original_relative_position);
216 
217   virtual void determine_webm();
218 
219 protected:
220   virtual bool process_internal();
221 };
222 using kax_analyzer_cptr = std::shared_ptr<kax_analyzer_c>;
223 
224 class console_kax_analyzer_c: public kax_analyzer_c {
225 private:
226   bool m_show_progress;
227   int m_previous_percentage;
228 
229 public:
230   console_kax_analyzer_c(std::string file_name);
231   virtual ~console_kax_analyzer_c() = default;
232 
233   virtual void set_show_progress(bool show_progress);
234 
235   virtual void show_progress_start(int64_t size);
236   virtual bool show_progress_running(int percentage);
237   virtual void show_progress_done();
238 
239   virtual void debug_abort_process();
240 };
241 using console_kax_analyzer_cptr = std::shared_ptr<console_kax_analyzer_c>;
242