1 /*
2 mkvmerge -- utility for splicing together matroska files
3 from component media subtypes
4
5 Distributed under the GPL v2
6 see the file COPYING for details
7 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8
9 EBML/XML converter specialization for chapters
10
11 Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13
14 #include "common/common_pch.h"
15
16 #include <sstream>
17
18 #include "common/iso639.h"
19 #include "common/iso3166.h"
20 #include "common/mm_io_x.h"
21 #include "common/mm_file_io.h"
22 #include "common/mm_proxy_io.h"
23 #include "common/mm_text_io.h"
24 #include "common/strings/formatting.h"
25 #include "common/unique_numbers.h"
26 #include "common/xml/ebml_chapters_converter.h"
27
28 using namespace libmatroska;
29
30 namespace mtx::xml {
31
ebml_chapters_converter_c()32 ebml_chapters_converter_c::ebml_chapters_converter_c()
33 {
34 setup_maps();
35 }
36
~ebml_chapters_converter_c()37 ebml_chapters_converter_c::~ebml_chapters_converter_c() {
38 }
39
40 void
setup_maps()41 ebml_chapters_converter_c::setup_maps() {
42 m_formatter_map["ChapterTimeStart"] = format_timestamp;
43 m_formatter_map["ChapterTimeEnd"] = format_timestamp;
44
45 m_parser_map["ChapterTimeStart"] = parse_timestamp;
46 m_parser_map["ChapterTimeEnd"] = parse_timestamp;
47
48 m_limits["EditionUID"] = limits_t{ true, false, 1, 0 };
49 m_limits["EditionFlagHidden"] = limits_t{ true, true, 0, 1 };
50 m_limits["EditionFlagDefault"] = limits_t{ true, true, 0, 1 };
51 m_limits["EditionFlagOrdered"] = limits_t{ true, true, 0, 1 };
52 m_limits["ChapterFlagHidden"] = limits_t{ true, true, 0, 1 };
53 m_limits["ChapterFlagEnabled"] = limits_t{ true, true, 0, 1 };
54 m_limits["ChapterUID"] = limits_t{ true, false, 1, 0 };
55 m_limits["ChapterSegmentUID"] = limits_t{ true, false, 1, 0 };
56 m_limits["ChapterSegmentEditionUID"] = limits_t{ true, false, 1, 0 };
57 m_limits["ChapterTrackNumber"] = limits_t{ true, false, 1, 0 };
58
59 reverse_debug_to_tag_name_map();
60
61 if (debugging_c::requested("ebml_converter_semantics"))
62 dump_semantics("Chapters");
63 }
64
65 void
fix_xml(document_cptr & doc) const66 ebml_chapters_converter_c::fix_xml(document_cptr &doc)
67 const {
68 auto result = doc->select_nodes("//ChapterAtom[not(ChapterTimeStart)]");
69 for (auto &atom : result)
70 atom.node().append_child("ChapterTimeStart").append_child(pugi::node_pcdata).set_value(mtx::string::format_timestamp(0).c_str());
71
72 result = doc->select_nodes("//ChapterDisplay[not(ChapterString)]");
73 for (auto &atom : result)
74 atom.node().append_child("ChapterString");
75 }
76
77 void
fix_ebml(EbmlMaster & chapters) const78 ebml_chapters_converter_c::fix_ebml(EbmlMaster &chapters)
79 const {
80 for (auto element : chapters)
81 if (dynamic_cast<KaxEditionEntry *>(element))
82 fix_edition_entry(static_cast<KaxEditionEntry &>(*element));
83 }
84
85 void
fix_edition_entry(KaxEditionEntry & eentry) const86 ebml_chapters_converter_c::fix_edition_entry(KaxEditionEntry &eentry)
87 const {
88 bool atom_found = false;
89
90 KaxEditionUID *euid = nullptr;
91 for (auto element : eentry)
92 if (dynamic_cast<KaxEditionUID *>(element)) {
93 euid = static_cast<KaxEditionUID *>(element);
94 if (!is_unique_number(euid->GetValue(), UNIQUE_EDITION_IDS)) {
95 mxwarn(fmt::format(Y("Chapter parser: The EditionUID {0} is not unique and could not be reused. A new one will be created.\n"), euid->GetValue()));
96 euid->SetValue(create_unique_number(UNIQUE_EDITION_IDS));
97 }
98
99 } else if (dynamic_cast<KaxChapterAtom *>(element)) {
100 atom_found = true;
101 fix_atom(static_cast<KaxChapterAtom &>(*element));
102 }
103
104 if (!atom_found)
105 throw conversion_x{Y("At least one <ChapterAtom> element is needed.")};
106
107 if (!euid)
108 eentry.PushElement((new KaxEditionUID)->SetValue(create_unique_number(UNIQUE_EDITION_IDS)));
109 }
110
111 void
fix_atom(KaxChapterAtom & atom) const112 ebml_chapters_converter_c::fix_atom(KaxChapterAtom &atom)
113 const {
114 for (auto element : atom)
115 if (dynamic_cast<KaxChapterAtom *>(element))
116 fix_atom(*static_cast<KaxChapterAtom *>(element));
117
118 if (!FindChild<KaxChapterTimeStart>(atom))
119 throw conversion_x{Y("<ChapterAtom> is missing the <ChapterTimeStart> child.")};
120
121 if (!FindChild<KaxChapterUID>(atom))
122 atom.PushElement((new KaxChapterUID)->SetValue(create_unique_number(UNIQUE_CHAPTER_IDS)));
123
124 auto ctrack = FindChild<KaxChapterTrack>(atom);
125 if (ctrack && !FindChild<KaxChapterTrackNumber>(ctrack))
126 throw conversion_x{Y("<ChapterTrack> is missing the <ChapterTrackNumber> child.")};
127
128 auto cdisplay = FindChild<KaxChapterDisplay>(atom);
129 if (cdisplay)
130 fix_display(*cdisplay);
131 }
132
133 void
fix_display_languages_and_countries(libmatroska::KaxChapterDisplay & display) const134 ebml_chapters_converter_c::fix_display_languages_and_countries(libmatroska::KaxChapterDisplay &display)
135 const {
136 for (auto const &child : display)
137 if (auto kax_ietf_language = dynamic_cast<libmatroska::KaxChapLanguageIETF *>(child); kax_ietf_language) {
138 auto parsed_language = mtx::bcp47::language_c::parse(kax_ietf_language->GetValue());
139
140 if (!parsed_language.is_valid())
141 throw conversion_x{fmt::format(Y("'{0}' is not a valid IETF BCP 47/RFC 5646 language tag. Additional information from the parser: {1}"), kax_ietf_language->GetValue(), parsed_language.get_error())};
142
143 } else if (auto kax_legacy_language = dynamic_cast<libmatroska::KaxChapterLanguage *>(child); kax_legacy_language) {
144 auto code = kax_legacy_language->GetValue();
145 auto language_opt = mtx::iso639::look_up(code);
146
147 if (!language_opt || !language_opt->is_part_of_iso639_2)
148 throw conversion_x{fmt::format(Y("'{0}' is not a valid ISO 639-2 language code."), code)};
149
150 } else if (auto kax_country = dynamic_cast<libmatroska::KaxChapterCountry *>(child); kax_country) {
151 auto country = kax_country->GetValue();
152 auto country_opt = mtx::iso3166::look_up_cctld(country);
153 if (!country_opt)
154 throw conversion_x{fmt::format(Y("'{0}' is not a valid ccTLD country code."), country)};
155
156 auto cctld = mtx::string::to_lower_ascii(country_opt->alpha_2_code);
157
158 if (country != cctld)
159 kax_country->SetValue(cctld);
160 }
161
162 mtx::chapters::unify_legacy_and_bcp47_languages_and_countries(display);
163 }
164
165 void
fix_display(libmatroska::KaxChapterDisplay & display) const166 ebml_chapters_converter_c::fix_display(libmatroska::KaxChapterDisplay &display)
167 const {
168 if (!FindChild<KaxChapterString>(display))
169 throw conversion_x{Y("<ChapterDisplay> is missing the <ChapterString> child.")};
170
171 fix_display_languages_and_countries(display);
172 }
173
174 void
write_xml(KaxChapters & chapters,mm_io_c & out)175 ebml_chapters_converter_c::write_xml(KaxChapters &chapters,
176 mm_io_c &out) {
177 document_cptr doc(new pugi::xml_document);
178
179 doc->append_child(pugi::node_comment).set_value(" <!DOCTYPE Chapters SYSTEM \"matroskachapters.dtd\"> ");
180
181 ebml_chapters_converter_c converter;
182 converter.to_xml(chapters, doc);
183
184 out.write_bom("UTF-8");
185
186 std::stringstream out_stream;
187 doc->save(out_stream, " ");
188 out.puts(out_stream.str());
189 }
190
191 bool
probe_file(std::string const & file_name)192 ebml_chapters_converter_c::probe_file(std::string const &file_name) {
193 try {
194 mm_text_io_c in(std::make_shared<mm_file_io_c>(file_name, MODE_READ));
195 std::string line;
196
197 while (in.getline2(line)) {
198 // I assume that if it looks like XML then it is an XML chapter file :)
199 mtx::string::strip(line);
200 if (balg::istarts_with(line, "<?xml"))
201 return true;
202 else if (!line.empty())
203 return false;
204 }
205
206 } catch (...) {
207 }
208
209 return false;
210 }
211
212 mtx::chapters::kax_cptr
parse_file(std::string const & file_name,bool throw_on_error)213 ebml_chapters_converter_c::parse_file(std::string const &file_name,
214 bool throw_on_error) {
215 auto parse = [&file_name]() -> auto {
216 auto master = ebml_chapters_converter_c{}.to_ebml(file_name, "Chapters");
217 sort_ebml_master(master.get());
218 fix_mandatory_elements(static_cast<KaxChapters *>(master.get()));
219 return std::dynamic_pointer_cast<KaxChapters>(master);
220 };
221
222 if (throw_on_error)
223 return parse();
224
225 try {
226 return parse();
227
228 } catch (mtx::mm_io::exception &ex) {
229 mxerror(fmt::format(Y("The XML chapter file '{0}' could not be read.\n"), file_name));
230
231 } catch (mtx::xml::xml_parser_x &ex) {
232 mxerror(fmt::format(Y("The XML chapter file '{0}' contains an error at position {2}: {1}\n"), file_name, ex.result().description(), ex.result().offset));
233
234 } catch (mtx::xml::exception &ex) {
235 mxerror(fmt::format(Y("The XML chapter file '{0}' contains an error: {1}\n"), file_name, ex.what()));
236 }
237
238 return mtx::chapters::kax_cptr{};
239 }
240
241 }
242