1 /*
2   mkvmerge -- utility for splicing together matroska files
3   from component media subtypes
4 
5   Distributed under the GPL v2
6   see the file COPYING for details
7   or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8 
9   EBML/XML converter specialization for chapters
10 
11   Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13 
14 #include "common/common_pch.h"
15 
16 #include <sstream>
17 
18 #include "common/iso639.h"
19 #include "common/iso3166.h"
20 #include "common/mm_io_x.h"
21 #include "common/mm_file_io.h"
22 #include "common/mm_proxy_io.h"
23 #include "common/mm_text_io.h"
24 #include "common/strings/formatting.h"
25 #include "common/unique_numbers.h"
26 #include "common/xml/ebml_chapters_converter.h"
27 
28 using namespace libmatroska;
29 
30 namespace mtx::xml {
31 
ebml_chapters_converter_c()32 ebml_chapters_converter_c::ebml_chapters_converter_c()
33 {
34   setup_maps();
35 }
36 
~ebml_chapters_converter_c()37 ebml_chapters_converter_c::~ebml_chapters_converter_c() {
38 }
39 
40 void
setup_maps()41 ebml_chapters_converter_c::setup_maps() {
42   m_formatter_map["ChapterTimeStart"]  = format_timestamp;
43   m_formatter_map["ChapterTimeEnd"]    = format_timestamp;
44 
45   m_parser_map["ChapterTimeStart"]     = parse_timestamp;
46   m_parser_map["ChapterTimeEnd"]       = parse_timestamp;
47 
48   m_limits["EditionUID"]               = limits_t{ true, false, 1, 0 };
49   m_limits["EditionFlagHidden"]        = limits_t{ true, true,  0, 1 };
50   m_limits["EditionFlagDefault"]       = limits_t{ true, true,  0, 1 };
51   m_limits["EditionFlagOrdered"]       = limits_t{ true, true,  0, 1 };
52   m_limits["ChapterFlagHidden"]        = limits_t{ true, true,  0, 1 };
53   m_limits["ChapterFlagEnabled"]       = limits_t{ true, true,  0, 1 };
54   m_limits["ChapterUID"]               = limits_t{ true, false, 1, 0 };
55   m_limits["ChapterSegmentUID"]        = limits_t{ true, false, 1, 0 };
56   m_limits["ChapterSegmentEditionUID"] = limits_t{ true, false, 1, 0 };
57   m_limits["ChapterTrackNumber"]       = limits_t{ true, false, 1, 0 };
58 
59   reverse_debug_to_tag_name_map();
60 
61   if (debugging_c::requested("ebml_converter_semantics"))
62     dump_semantics("Chapters");
63 }
64 
65 void
fix_xml(document_cptr & doc) const66 ebml_chapters_converter_c::fix_xml(document_cptr &doc)
67   const {
68   auto result = doc->select_nodes("//ChapterAtom[not(ChapterTimeStart)]");
69   for (auto &atom : result)
70     atom.node().append_child("ChapterTimeStart").append_child(pugi::node_pcdata).set_value(mtx::string::format_timestamp(0).c_str());
71 
72   result = doc->select_nodes("//ChapterDisplay[not(ChapterString)]");
73   for (auto &atom : result)
74     atom.node().append_child("ChapterString");
75 }
76 
77 void
fix_ebml(EbmlMaster & chapters) const78 ebml_chapters_converter_c::fix_ebml(EbmlMaster &chapters)
79   const {
80   for (auto element : chapters)
81     if (dynamic_cast<KaxEditionEntry *>(element))
82       fix_edition_entry(static_cast<KaxEditionEntry &>(*element));
83 }
84 
85 void
fix_edition_entry(KaxEditionEntry & eentry) const86 ebml_chapters_converter_c::fix_edition_entry(KaxEditionEntry &eentry)
87   const {
88   bool atom_found = false;
89 
90   KaxEditionUID *euid = nullptr;
91   for (auto element : eentry)
92     if (dynamic_cast<KaxEditionUID *>(element)) {
93       euid = static_cast<KaxEditionUID *>(element);
94       if (!is_unique_number(euid->GetValue(), UNIQUE_EDITION_IDS)) {
95         mxwarn(fmt::format(Y("Chapter parser: The EditionUID {0} is not unique and could not be reused. A new one will be created.\n"), euid->GetValue()));
96         euid->SetValue(create_unique_number(UNIQUE_EDITION_IDS));
97       }
98 
99     } else if (dynamic_cast<KaxChapterAtom *>(element)) {
100       atom_found = true;
101       fix_atom(static_cast<KaxChapterAtom &>(*element));
102     }
103 
104   if (!atom_found)
105     throw conversion_x{Y("At least one <ChapterAtom> element is needed.")};
106 
107   if (!euid)
108     eentry.PushElement((new KaxEditionUID)->SetValue(create_unique_number(UNIQUE_EDITION_IDS)));
109 }
110 
111 void
fix_atom(KaxChapterAtom & atom) const112 ebml_chapters_converter_c::fix_atom(KaxChapterAtom &atom)
113   const {
114   for (auto element : atom)
115     if (dynamic_cast<KaxChapterAtom *>(element))
116       fix_atom(*static_cast<KaxChapterAtom *>(element));
117 
118   if (!FindChild<KaxChapterTimeStart>(atom))
119     throw conversion_x{Y("<ChapterAtom> is missing the <ChapterTimeStart> child.")};
120 
121   if (!FindChild<KaxChapterUID>(atom))
122     atom.PushElement((new KaxChapterUID)->SetValue(create_unique_number(UNIQUE_CHAPTER_IDS)));
123 
124   auto ctrack = FindChild<KaxChapterTrack>(atom);
125   if (ctrack && !FindChild<KaxChapterTrackNumber>(ctrack))
126     throw conversion_x{Y("<ChapterTrack> is missing the <ChapterTrackNumber> child.")};
127 
128   auto cdisplay = FindChild<KaxChapterDisplay>(atom);
129   if (cdisplay)
130     fix_display(*cdisplay);
131 }
132 
133 void
fix_display_languages_and_countries(libmatroska::KaxChapterDisplay & display) const134 ebml_chapters_converter_c::fix_display_languages_and_countries(libmatroska::KaxChapterDisplay &display)
135   const {
136   for (auto const &child : display)
137     if (auto kax_ietf_language = dynamic_cast<libmatroska::KaxChapLanguageIETF *>(child); kax_ietf_language) {
138       auto parsed_language = mtx::bcp47::language_c::parse(kax_ietf_language->GetValue());
139 
140       if (!parsed_language.is_valid())
141         throw conversion_x{fmt::format(Y("'{0}' is not a valid IETF BCP 47/RFC 5646 language tag. Additional information from the parser: {1}"), kax_ietf_language->GetValue(), parsed_language.get_error())};
142 
143     } else if (auto kax_legacy_language = dynamic_cast<libmatroska::KaxChapterLanguage *>(child); kax_legacy_language) {
144       auto code         = kax_legacy_language->GetValue();
145       auto language_opt = mtx::iso639::look_up(code);
146 
147       if (!language_opt || !language_opt->is_part_of_iso639_2)
148         throw conversion_x{fmt::format(Y("'{0}' is not a valid ISO 639-2 language code."), code)};
149 
150     } else if (auto kax_country = dynamic_cast<libmatroska::KaxChapterCountry *>(child); kax_country) {
151       auto country     = kax_country->GetValue();
152       auto country_opt = mtx::iso3166::look_up_cctld(country);
153       if (!country_opt)
154         throw conversion_x{fmt::format(Y("'{0}' is not a valid ccTLD country code."), country)};
155 
156       auto cctld = mtx::string::to_lower_ascii(country_opt->alpha_2_code);
157 
158       if (country != cctld)
159         kax_country->SetValue(cctld);
160     }
161 
162   mtx::chapters::unify_legacy_and_bcp47_languages_and_countries(display);
163 }
164 
165 void
fix_display(libmatroska::KaxChapterDisplay & display) const166 ebml_chapters_converter_c::fix_display(libmatroska::KaxChapterDisplay &display)
167   const {
168   if (!FindChild<KaxChapterString>(display))
169     throw conversion_x{Y("<ChapterDisplay> is missing the <ChapterString> child.")};
170 
171   fix_display_languages_and_countries(display);
172 }
173 
174 void
write_xml(KaxChapters & chapters,mm_io_c & out)175 ebml_chapters_converter_c::write_xml(KaxChapters &chapters,
176                                      mm_io_c &out) {
177   document_cptr doc(new pugi::xml_document);
178 
179   doc->append_child(pugi::node_comment).set_value(" <!DOCTYPE Chapters SYSTEM \"matroskachapters.dtd\"> ");
180 
181   ebml_chapters_converter_c converter;
182   converter.to_xml(chapters, doc);
183 
184   out.write_bom("UTF-8");
185 
186   std::stringstream out_stream;
187   doc->save(out_stream, "  ");
188   out.puts(out_stream.str());
189 }
190 
191 bool
probe_file(std::string const & file_name)192 ebml_chapters_converter_c::probe_file(std::string const &file_name) {
193   try {
194     mm_text_io_c in(std::make_shared<mm_file_io_c>(file_name, MODE_READ));
195     std::string line;
196 
197     while (in.getline2(line)) {
198       // I assume that if it looks like XML then it is an XML chapter file :)
199       mtx::string::strip(line);
200       if (balg::istarts_with(line, "<?xml"))
201         return true;
202       else if (!line.empty())
203         return false;
204     }
205 
206   } catch (...) {
207   }
208 
209   return false;
210 }
211 
212 mtx::chapters::kax_cptr
parse_file(std::string const & file_name,bool throw_on_error)213 ebml_chapters_converter_c::parse_file(std::string const &file_name,
214                                       bool throw_on_error) {
215   auto parse = [&file_name]() -> auto {
216     auto master = ebml_chapters_converter_c{}.to_ebml(file_name, "Chapters");
217     sort_ebml_master(master.get());
218     fix_mandatory_elements(static_cast<KaxChapters *>(master.get()));
219     return std::dynamic_pointer_cast<KaxChapters>(master);
220   };
221 
222   if (throw_on_error)
223     return parse();
224 
225   try {
226     return parse();
227 
228   } catch (mtx::mm_io::exception &ex) {
229     mxerror(fmt::format(Y("The XML chapter file '{0}' could not be read.\n"), file_name));
230 
231   } catch (mtx::xml::xml_parser_x &ex) {
232     mxerror(fmt::format(Y("The XML chapter file '{0}' contains an error at position {2}: {1}\n"), file_name, ex.result().description(), ex.result().offset));
233 
234   } catch (mtx::xml::exception &ex) {
235     mxerror(fmt::format(Y("The XML chapter file '{0}' contains an error: {1}\n"), file_name, ex.what()));
236   }
237 
238   return mtx::chapters::kax_cptr{};
239 }
240 
241 }
242