1 /** \brief chapter parser and helper functions
2 
3    mkvmerge -- utility for splicing together matroska files
4    from component media subtypes
5 
6    Distributed under the GPL v2
7    see the file COPYING for details
8    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
9 
10    \file
11 
12    \author Written by Moritz Bunkus <moritz@bunkus.org>.
13 */
14 
15 #include "common/common_pch.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 
20 #include <QRegularExpression>
21 
22 #include <matroska/KaxChapters.h>
23 
24 #include "common/bcp47.h"
25 #include "common/chapters/chapters.h"
26 #include "common/chapters/dvd.h"
27 #include "common/construct.h"
28 #include "common/container.h"
29 #include "common/debugging.h"
30 #include "common/ebml.h"
31 #include "common/error.h"
32 #include "common/iso3166.h"
33 #include "common/locale.h"
34 #include "common/mm_io_x.h"
35 #include "common/mm_file_io.h"
36 #include "common/mm_proxy_io.h"
37 #include "common/mm_text_io.h"
38 #include "common/path.h"
39 #include "common/qt.h"
40 #include "common/strings/editing.h"
41 #include "common/strings/formatting.h"
42 #include "common/strings/parsing.h"
43 #include "common/unique_numbers.h"
44 #include "common/xml/ebml_chapters_converter.h"
45 
46 using namespace libmatroska;
47 
48 namespace mtx::chapters {
49 
50 namespace {
51 debugging_option_c s_debug{"chapters|chapter_parser"};
52 }
53 
54 /** The default language for all chapter entries that don't have their own. */
55 mtx::bcp47::language_c g_default_language;
56 /** The default country for all chapter entries that don't have their own. */
57 std::string g_default_country;
58 
59 translatable_string_c g_chapter_generation_name_template{YT("Chapter <NUM:2>")};
60 
61 constexpr auto SIMCHAP_RE_TIMESTAMP_LINE = "^\\s*CHAPTER\\d+\\s*=\\s*(\\d+)\\s*:\\s*(\\d+)\\s*:\\s*(\\d+)\\s*[\\.,]\\s*(\\d{1,9})";
62 constexpr auto SIMCHAP_RE_TIMESTAMP      = "^\\s*CHAPTER\\d+\\s*=(.*)";
63 constexpr auto SIMCHAP_RE_NAME_LINE      = "^\\s*CHAPTER\\d+NAME\\s*=(.*)";
64 
65 void
unify_legacy_and_bcp47_languages_and_countries(EbmlElement & elt)66 unify_legacy_and_bcp47_languages_and_countries(EbmlElement &elt) {
67   auto master = dynamic_cast<libebml::EbmlMaster *>(&elt);
68   if (!master)
69     return;
70 
71   auto display = dynamic_cast<KaxChapterDisplay *>(&elt);
72   if (!display) {
73     for (auto const child : *master)
74       unify_legacy_and_bcp47_languages_and_countries(*child);
75     return;
76   }
77 
78   std::vector<std::string> legacy_languages, legacy_countries;
79   std::vector<mtx::bcp47::language_c> bcp47_languages;
80   auto child_idx = 0u;
81 
82   while (child_idx < display->ListSize()) {
83     auto remove_child = true;
84     auto *child       = (*display)[child_idx];
85 
86     if (dynamic_cast<KaxChapterLanguage *>(child)) {
87       auto legacy_language = static_cast<KaxChapterLanguage &>(*child).GetValue();
88       if (!legacy_language.empty() && !mtx::includes(legacy_languages, legacy_language))
89         legacy_languages.emplace_back(legacy_language);
90 
91     } else if (dynamic_cast<KaxChapterCountry *>(child)) {
92       auto legacy_country = static_cast<KaxChapterCountry &>(*child).GetValue();
93       if (!legacy_country.empty() && !mtx::includes(legacy_countries, legacy_country))
94         legacy_countries.emplace_back(legacy_country);
95 
96     } else if (dynamic_cast<KaxChapLanguageIETF *>(child)) {
97       auto bcp47_language = mtx::bcp47::language_c::parse(static_cast<KaxChapLanguageIETF &>(*child).GetValue());
98       if (bcp47_language.is_valid() && !mtx::includes(bcp47_languages, bcp47_language))
99         bcp47_languages.emplace_back(bcp47_language);
100 
101     } else
102       remove_child = false;
103 
104     if (remove_child) {
105       display->Remove(child_idx);
106       delete child;
107 
108     } else
109       ++child_idx;
110   }
111 
112   if (legacy_languages.empty() && bcp47_languages.empty())
113     legacy_languages.emplace_back("eng"s);
114 
115   if (bcp47_languages.empty()) {
116     auto add_maybe = [&bcp47_languages](std::string const &new_bcp47_language_str) {
117       auto new_bcp47_language = mtx::bcp47::language_c::parse(new_bcp47_language_str);
118       if (new_bcp47_language.is_valid() && !mtx::includes(bcp47_languages, new_bcp47_language))
119         bcp47_languages.emplace_back(new_bcp47_language);
120     };
121 
122     for (auto const &legacy_language : legacy_languages) {
123       if (legacy_countries.empty())
124         add_maybe(legacy_language);
125 
126       else
127         for (auto const &legacy_country : legacy_countries) {
128           auto language_and_region = fmt::format("{0}-{1}", legacy_language, mtx::string::to_lower_ascii(legacy_country) == "uk" ? "gb"s : legacy_country);
129           add_maybe(language_and_region);
130         }
131     }
132   }
133 
134   legacy_languages.clear();
135   legacy_countries.clear();
136 
137   for (auto const &bcp47_language : bcp47_languages) {
138     auto legacy_language = bcp47_language.get_iso639_2_alpha_3_code_or("und");
139 
140     if (!mtx::includes(legacy_languages, legacy_language))
141       legacy_languages.emplace_back(legacy_language);
142 
143     auto legacy_country = bcp47_language.get_top_level_domain_country_code();
144 
145     if (!legacy_country.empty() && !mtx::includes(legacy_countries, legacy_country))
146       legacy_countries.emplace_back(legacy_country);
147   }
148 
149   std::sort(legacy_languages.begin(), legacy_languages.end());
150   std::sort(legacy_countries.begin(), legacy_countries.end());
151   std::sort(bcp47_languages.begin(),  bcp47_languages.end());
152 
153   for (auto const &legacy_language : legacy_languages)
154     AddEmptyChild<KaxChapterLanguage>(display).SetValue(legacy_language);
155 
156   for (auto const &legacy_country : legacy_countries)
157     AddEmptyChild<KaxChapterCountry>(display).SetValue(legacy_country);
158 
159   if (mtx::bcp47::language_c::is_disabled())
160     return;
161 
162   for (auto const &bcp47_language : bcp47_languages)
163     AddEmptyChild<KaxChapLanguageIETF>(display).SetValue(bcp47_language.format());
164 }
165 
166 /** \brief Throw a special chapter parser exception.
167 
168    \param error The error message.
169 */
170 inline void
chapter_error(const std::string & error)171 chapter_error(const std::string &error) {
172   throw parser_x(fmt::format(Y("Simple chapter parser: {0}\n"), error));
173 }
174 
175 /** \brief Reads the start of a file and checks for OGM style comments.
176 
177    The first lines are read. OGM style comments are recognized if the first
178    non-empty line contains <tt>CHAPTER01=...</tt> and the first non-empty
179    line afterwards contains <tt>CHAPTER01NAME=...</tt>.
180 
181    The parameters are checked for validity.
182 
183    \param in The file to read from.
184 
185    \return \c true if the file contains OGM style comments and \c false
186      otherwise.
187 */
188 bool
probe_simple(mm_text_io_c * in)189 probe_simple(mm_text_io_c *in) {
190   QRegularExpression timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
191   QRegularExpression name_line_re{     SIMCHAP_RE_NAME_LINE};
192 
193   std::string line;
194 
195   assert(in);
196 
197   in->setFilePointer(0);
198   while (in->getline2(line)) {
199     mtx::string::strip(line);
200     if (line.empty())
201       continue;
202 
203     if (!Q(line).contains(timestamp_line_re))
204       return false;
205 
206     while (in->getline2(line)) {
207       mtx::string::strip(line);
208       if (line.empty())
209         continue;
210 
211       return Q(line).contains(name_line_re);
212     }
213 
214     return false;
215   }
216 
217   return false;
218 }
219 
220 //           1         2
221 // 012345678901234567890123
222 //
223 // CHAPTER01=00:00:00.000
224 // CHAPTER01NAME=Hallo Welt
225 
226 /** \brief Parse simple OGM style comments
227 
228    The file \a in is read. The content is assumed to be OGM style comments.
229 
230    The parameters are checked for validity.
231 
232    \param in The text file to read from.
233    \param min_ts An optional timestamp. If both \a min_ts and \a max_ts are
234      given then only those chapters that lie in the timerange
235      <tt>[min_ts..max_ts]</tt> are kept.
236    \param max_ts An optional timestamp. If both \a min_ts and \a max_ts are
237      given then only those chapters that lie in the timerange
238      <tt>[min_ts..max_ts]</tt> are kept.
239    \param offset An optional offset that is subtracted from all start and
240      end timestamps after the timerange check has been made.
241    \param language This language is added as the \c KaxChapterLanguage
242      for all entries.
243    \param charset The charset the chapters are supposed to be it. The entries
244      will be converted to UTF-8 if necessary.
245    \param exception_on_error If set to \c true then an exception is thrown
246      if an error occurs. Otherwise \c nullptr will be returned.
247 
248    \return The chapters parsed from the file or \c nullptr if an error occurred.
249 */
250 kax_cptr
parse_simple(mm_text_io_c * in,int64_t min_ts,int64_t max_ts,int64_t offset,mtx::bcp47::language_c const & language,std::string const & charset)251 parse_simple(mm_text_io_c *in,
252              int64_t min_ts,
253              int64_t max_ts,
254              int64_t offset,
255              mtx::bcp47::language_c const &language,
256              std::string const &charset) {
257   assert(in);
258 
259   in->setFilePointer(0);
260 
261   kax_cptr chaps{new KaxChapters};
262   KaxChapterAtom *atom     = nullptr;
263   KaxEditionEntry *edition = nullptr;
264   int mode                 = 0;
265   int num                  = 0;
266   int64_t start            = 0;
267   charset_converter_cptr cc_utf8;
268 
269   bool do_convert = in->get_byte_order_mark() == byte_order_mark_e::none;
270   if (do_convert)
271     cc_utf8 = charset_converter_c::init(charset);
272 
273   auto use_language = language.is_valid()           ? language
274                     : g_default_language.is_valid() ? g_default_language
275                     :                                 mtx::bcp47::language_c::parse("eng"s);
276 
277   QRegularExpression timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
278   QRegularExpression timestamp_re{     SIMCHAP_RE_TIMESTAMP};
279   QRegularExpression name_line_re{     SIMCHAP_RE_NAME_LINE};
280   QRegularExpressionMatch matches;
281 
282   std::string line;
283 
284   while (in->getline2(line)) {
285     if (do_convert)
286       line = cc_utf8->utf8(line);
287 
288     mtx::string::strip(line);
289     if (line.empty())
290       continue;
291 
292     if (0 == mode) {
293       matches = timestamp_line_re.match(Q(line));
294       if (!matches.hasMatch())
295         chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxx=... line."), line));
296 
297       int64_t hour = 0, minute = 0, second = 0, nsecs = 0;
298       mtx::string::parse_number(to_utf8(matches.captured(1)), hour);
299       mtx::string::parse_number(to_utf8(matches.captured(2)), minute);
300       mtx::string::parse_number(to_utf8(matches.captured(3)), second);
301       mtx::string::parse_number(to_utf8(matches.captured(4)), nsecs);
302 
303       if (59 < minute)
304         chapter_error(fmt::format(Y("Invalid minute: {0}"), minute));
305       if (59 < second)
306         chapter_error(fmt::format(Y("Invalid second: {0}"), second));
307 
308       for (int idx = matches.capturedLength(4); idx < 9; ++idx)
309         nsecs *= 10;
310 
311       start = nsecs + (second + minute * 60 + hour * 60 * 60) * 1'000'000'000;
312       mode  = 1;
313 
314       if (matches = timestamp_re.match(Q(line)); !matches.hasMatch())
315         chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxx=... line."), line));
316 
317     } else {
318       if (matches = name_line_re.match(Q(line)); !matches.hasMatch())
319         chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxxNAME=... line."), line));
320 
321       auto name = to_utf8(matches.captured(1));
322       if (name.empty())
323         name = format_name_template(g_chapter_generation_name_template.get_translated(), num + 1, timestamp_c::ns(start));
324 
325       mode = 0;
326 
327       if ((start >= min_ts) && ((start <= max_ts) || (max_ts == -1))) {
328         if (!edition)
329           edition = &GetChild<KaxEditionEntry>(*chaps);
330 
331         atom = &GetFirstOrNextChild<KaxChapterAtom>(*edition, atom);
332         GetChild<KaxChapterUID>(*atom).SetValue(create_unique_number(UNIQUE_CHAPTER_IDS));
333         GetChild<KaxChapterTimeStart>(*atom).SetValue(start - offset);
334 
335         auto &display = GetChild<KaxChapterDisplay>(*atom);
336 
337         GetChild<KaxChapterString>(display).SetValueUTF8(name);
338         if (use_language.is_valid()) {
339           GetChild<KaxChapterLanguage>(display).SetValue(use_language.get_iso639_2_alpha_3_code_or("und"));
340           if (!mtx::bcp47::language_c::is_disabled())
341             GetChild<KaxChapLanguageIETF>(display).SetValue(use_language.format());
342           else
343             DeleteChildren<KaxChapLanguageIETF>(display);
344         }
345 
346         if (!g_default_country.empty())
347           GetChild<KaxChapterCountry>(display).SetValue(g_default_country);
348 
349         ++num;
350       }
351     }
352   }
353 
354   return 0 == num ? kax_cptr{} : chaps;
355 }
356 
357 /** \brief Probe a file for different chapter formats and parse the file.
358 
359    The file \a file_name is opened and checked for supported chapter formats.
360    These include simple OGM style chapters, cue sheets and mkvtoolnix' own
361    XML chapter format.
362 
363    Its parameters don't have to be checked for validity.
364 
365    \param file_name The name of the text file to read from.
366    \param min_ts An optional timestamp. If both \a min_ts and \a max_ts are
367      given then only those chapters that lie in the timerange
368      <tt>[min_ts..max_ts]</tt> are kept.
369    \param max_ts An optional timestamp. If both \a min_ts and \a max_ts are
370      given then only those chapters that lie in the timerange
371      <tt>[min_ts..max_ts]</tt> are kept.
372    \param offset An optional offset that is subtracted from all start and
373      end timestamps after the timerange check has been made.
374    \param language This language is added as the \c KaxChapterLanguage
375      for entries that don't specifiy it.
376    \param charset The charset the chapters are supposed to be it. The entries
377      will be converted to UTF-8 if necessary. This parameter is ignored for XML
378      chapter files.
379    \param exception_on_error If set to \c true then an exception is thrown
380      if an error occurs. Otherwise \c nullptr will be returned.
381    \param format If given, this parameter will be set to the recognized chapter
382      format. May be \c nullptr if the caller is not interested in the result.
383    \param tags When parsing a cue sheet tags will be created along with the
384      chapter entries. These tags will be stored in this parameter.
385 
386    \return The chapters parsed from the file or \c nullptr if an error occurred.
387 
388    \see ::parse_chapters(mm_text_io_c *in,int64_t min_ts,int64_t max_ts, int64_t offset,const mtx::bcp47::language_c &language,const std::string &charset,bool exception_on_error,format_e *format,KaxTags **tags)
389 */
390 kax_cptr
parse(const std::string & file_name,int64_t min_ts,int64_t max_ts,int64_t offset,const mtx::bcp47::language_c & language,const std::string & charset,bool exception_on_error,format_e * format,std::unique_ptr<KaxTags> * tags)391 parse(const std::string &file_name,
392       int64_t min_ts,
393       int64_t max_ts,
394       int64_t offset,
395       const mtx::bcp47::language_c &language,
396       const std::string &charset,
397       bool exception_on_error,
398       format_e *format,
399       std::unique_ptr<KaxTags> *tags) {
400   try {
401 #if defined(HAVE_DVDREAD)
402     auto parsed_dvd_chapters = maybe_parse_dvd(file_name, language);
403     if (parsed_dvd_chapters) {
404       unify_legacy_and_bcp47_languages_and_countries(*parsed_dvd_chapters);
405       return parsed_dvd_chapters;
406     }
407 #endif
408 
409     mm_text_io_c in(std::make_shared<mm_file_io_c>(file_name));
410     auto parsed_chapters = parse(&in, min_ts, max_ts, offset, language, charset, exception_on_error, format, tags);
411 
412     if (parsed_chapters)
413       unify_legacy_and_bcp47_languages_and_countries(*parsed_chapters);
414 
415     return parsed_chapters;
416 
417   } catch (parser_x &e) {
418     if (exception_on_error)
419       throw;
420     mxerror(fmt::format(Y("Could not parse the chapters in '{0}': {1}\n"), file_name, e.error()));
421 
422   } catch (...) {
423     if (exception_on_error)
424       throw parser_x(fmt::format(Y("Could not open '{0}' for reading.\n"), file_name));
425     else
426       mxerror(fmt::format(Y("Could not open '{0}' for reading.\n"), file_name));
427   }
428 
429   return {};
430 }
431 
432 /** \brief Probe a file for different chapter formats and parse the file.
433 
434    The file \a in is checked for supported chapter formats. These include
435    simple OGM style chapters, cue sheets and mkvtoolnix' own XML chapter
436    format.
437 
438    The parameters are checked for validity.
439 
440    \param in The text file to read from.
441    \param min_ts An optional timestamp. If both \a min_ts and \a max_ts are
442      given then only those chapters that lie in the timerange
443      <tt>[min_ts..max_ts]</tt> are kept.
444    \param max_ts An optional timestamp. If both \a min_ts and \a max_ts are
445      given then only those chapters that lie in the timerange
446      <tt>[min_ts..max_ts]</tt> are kept.
447    \param offset An optional offset that is subtracted from all start and
448      end timestamps after the timerange check has been made.
449    \param language This language is added as the \c KaxChapterLanguage
450      for entries that don't specifiy it.
451    \param charset The charset the chapters are supposed to be it. The entries
452      will be converted to UTF-8 if necessary. This parameter is ignored for XML
453      chapter files.
454    \param exception_on_error If set to \c true then an exception is thrown
455      if an error occurs. Otherwise \c nullptr will be returned.
456    \param format If given, this parameter will be set to the recognized chapter
457      format. May be \c nullptr if the caller is not interested in the result.
458    \param tags When parsing a cue sheet tags will be created along with the
459      chapter entries. These tags will be stored in this parameter.
460 
461    \return The chapters parsed from the file or \c nullptr if an error occurred.
462 
463    \see ::parse_chapters(const std::string &file_name,int64_t min_ts,int64_t max_ts, int64_t offset,const mtx::bcp47::language_c &language,const std::string &charset,bool exception_on_error,format_e *format,std::unique_ptr<KaxTags> *tags)
464 */
465 kax_cptr
parse(mm_text_io_c * in,int64_t min_ts,int64_t max_ts,int64_t offset,const mtx::bcp47::language_c & language,const std::string & charset,bool exception_on_error,format_e * format,std::unique_ptr<KaxTags> * tags)466 parse(mm_text_io_c *in,
467       int64_t min_ts,
468       int64_t max_ts,
469       int64_t offset,
470       const mtx::bcp47::language_c &language,
471       const std::string &charset,
472       bool exception_on_error,
473       format_e *format,
474       std::unique_ptr<KaxTags> *tags) {
475   assert(in);
476 
477   std::string error;
478 
479   try {
480     if (probe_simple(in)) {
481       if (format)
482         *format = format_e::ogg;
483       return parse_simple(in, min_ts, max_ts, offset, language, charset);
484 
485     } else if (probe_cue(in)) {
486       if (format)
487         *format = format_e::cue;
488       return parse_cue(in, min_ts, max_ts, offset, language, charset, tags);
489 
490     } else if (format)
491       *format = format_e::xml;
492 
493     if (mtx::xml::ebml_chapters_converter_c::probe_file(in->get_file_name())) {
494       auto chapters = mtx::xml::ebml_chapters_converter_c::parse_file(in->get_file_name(), true);
495       return select_in_timeframe(chapters.get(), min_ts, max_ts, offset) ? chapters : nullptr;
496     }
497 
498     error = fmt::format(Y("Unknown chapter file format in '{0}'. It does not contain a supported chapter format.\n"), in->get_file_name());
499   } catch (mtx::chapters::parser_x &e) {
500     error = e.error();
501   } catch (mtx::mm_io::exception &ex) {
502     error = fmt::format(Y("The XML chapter file '{0}' could not be read.\n"), in->get_file_name());
503   } catch (mtx::xml::xml_parser_x &ex) {
504     error = fmt::format(Y("The XML chapter file '{0}' contains an error at position {2}: {1}\n"), in->get_file_name(), ex.result().description(), ex.result().offset);
505   } catch (mtx::xml::exception &ex) {
506     error = fmt::format(Y("The XML chapter file '{0}' contains an error: {1}\n"), in->get_file_name(), ex.what());
507   }
508 
509   if (!error.empty()) {
510     if (exception_on_error)
511       throw mtx::chapters::parser_x(error);
512     mxerror(error);
513   }
514 
515   return {};
516 }
517 
518 /** \brief Get the start timestamp for a chapter atom.
519 
520    Its parameters don't have to be checked for validity.
521 
522    \param atom The atom for which the start timestamp should be returned.
523    \param value_if_not_found The value to return if no start timestamp child
524      element was found. Defaults to -1.
525 
526    \return The start timestamp or \c value_if_not_found if the atom doesn't
527      contain such a child element.
528 */
529 int64_t
get_start(KaxChapterAtom & atom,int64_t value_if_not_found)530 get_start(KaxChapterAtom &atom,
531           int64_t value_if_not_found) {
532   auto start = FindChild<KaxChapterTimeStart>(&atom);
533 
534   return !start ? value_if_not_found : static_cast<int64_t>(start->GetValue());
535 }
536 
537 /** \brief Get the end timestamp for a chapter atom.
538 
539    Its parameters don't have to be checked for validity.
540 
541    \param atom The atom for which the end timestamp should be returned.
542    \param value_if_not_found The value to return if no end timestamp child
543      element was found. Defaults to -1.
544 
545    \return The start timestamp or \c value_if_not_found if the atom doesn't
546      contain such a child element.
547 */
548 int64_t
get_end(KaxChapterAtom & atom,int64_t value_if_not_found)549 get_end(KaxChapterAtom &atom,
550         int64_t value_if_not_found) {
551   auto end = FindChild<KaxChapterTimeEnd>(&atom);
552 
553   return !end ? value_if_not_found : static_cast<int64_t>(end->GetValue());
554 }
555 
556 /** \brief Get the name for a chapter atom.
557 
558    Its parameters don't have to be checked for validity.
559 
560    \param atom The atom for which the name should be returned.
561 
562    \return The atom's name UTF-8 coded or \c "" if the atom doesn't contain
563      such a child element.
564 */
565 std::string
get_name(KaxChapterAtom & atom)566 get_name(KaxChapterAtom &atom) {
567   auto display = FindChild<KaxChapterDisplay>(&atom);
568   if (!display)
569     return "";
570 
571   auto name = FindChild<KaxChapterString>(display);
572   if (!name)
573     return "";
574 
575   return name->GetValueUTF8();
576 }
577 
578 /** \brief Get the unique ID for a chapter atom.
579 
580    Its parameters don't have to be checked for validity.
581 
582    \param atom The atom for which the unique ID should be returned.
583 
584    \return The ID or \c -1 if the atom doesn't contain such a
585      child element.
586 */
587 int64_t
get_uid(KaxChapterAtom & atom)588 get_uid(KaxChapterAtom &atom) {
589   auto uid = FindChild<KaxChapterUID>(&atom);
590 
591   return !uid ? -1 : static_cast<int64_t>(uid->GetValue());
592 }
593 
594 void
remove_elements_unsupported_by_webm(EbmlMaster & master)595 remove_elements_unsupported_by_webm(EbmlMaster &master) {
596   static std::unordered_map<uint32_t, bool> s_supported_elements, s_readd_with_defaults;
597 
598   if (s_supported_elements.empty()) {
599     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapters))            ] = true;
600     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxEditionEntry))        ] = true;
601     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterAtom))         ] = true;
602     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterUID))          ] = true;
603     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterStringUID))    ] = true;
604     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterTimeStart))    ] = true;
605     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterTimeEnd))      ] = true;
606     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterDisplay))      ] = true;
607     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterString))       ] = true;
608     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterLanguage))     ] = true;
609     s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterCountry))      ] = true;
610 
611     s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxEditionFlagDefault)) ] = true;
612     s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxEditionFlagHidden))  ] = true;
613     s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxChapterFlagEnabled)) ] = true;
614     s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxChapterFlagHidden))  ] = true;
615   }
616 
617   auto idx = 0u;
618 
619   while (idx < master.ListSize()) {
620     auto e = master[idx];
621 
622     if (e && s_supported_elements[ EBML_ID_VALUE(EbmlId(*e)) ]) {
623       auto sub_master = dynamic_cast<EbmlMaster *>(e);
624       if (sub_master)
625         remove_elements_unsupported_by_webm(*sub_master);
626 
627       ++idx;
628 
629       continue;
630     }
631 
632     if (e && s_readd_with_defaults[ EBML_ID_VALUE(EbmlId(*e)) ]) {
633       auto new_with_defaults = &(e->CreateElement());
634       delete e;
635       master.GetElementList()[idx] = new_with_defaults;
636 
637       ++idx;
638 
639       continue;
640     }
641 
642     delete e;
643     master.Remove(idx);
644   }
645 }
646 
647 /** \brief Remove all chapter atoms that are outside of a time range
648 
649    All chapter atoms that lie completely outside the timestamp range
650    given with <tt>[min_ts..max_ts]</tt> are deleted. This is the workhorse
651    for ::select_chapters_in_timeframe
652 
653    Chapters which start before the window but end inside or after the window
654    are kept as well, and their start timestamp is adjusted.
655 
656    Its parameters don't have to be checked for validity.
657 
658    \param min_ts The minimum timestamp to accept.
659    \param max_ts The maximum timestamp to accept.
660    \param offset This value is subtracted from both the start and end timestamp
661      for each chapter after the decision whether or not to keep it has been
662      made.
663    \param m The master containing the elements to check.
664 */
665 static void
remove_entries(int64_t min_ts,int64_t max_ts,int64_t offset,EbmlMaster & m)666 remove_entries(int64_t min_ts,
667                int64_t max_ts,
668                int64_t offset,
669                EbmlMaster &m) {
670   if (0 == m.ListSize())
671     return;
672 
673   struct chapter_entry_t {
674     bool remove{}, spans{}, is_atom{};
675     int64_t start{}, end{-1};
676   };
677   std::vector<chapter_entry_t> entries;
678   entries.resize(m.ListSize());
679 
680   unsigned int last_atom_at = 0;
681   bool last_atom_found      = false;
682 
683   // Determine whether or not an entry has to be removed. Also retrieve
684   // the start and end timestamps.
685   size_t i;
686   for (i = 0; m.ListSize() > i; ++i) {
687     auto atom = dynamic_cast<KaxChapterAtom *>(m[i]);
688     if (!atom)
689       continue;
690 
691     last_atom_at       = i;
692     last_atom_found    = true;
693     entries[i].is_atom = true;
694 
695     auto cts = static_cast<KaxChapterTimeStart *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeStart), false));
696 
697     if (cts)
698       entries[i].start = cts->GetValue();
699 
700     auto cte = static_cast<KaxChapterTimeEnd *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeEnd), false));
701 
702     if (cte)
703       entries[i].end = cte->GetValue();
704   }
705 
706   // We can return if we don't have a single atom to work with.
707   if (!last_atom_found)
708     return;
709 
710   for (i = 0; m.ListSize() > i; ++i) {
711     auto atom = dynamic_cast<KaxChapterAtom *>(m[i]);
712     if (!atom)
713       continue;
714 
715     // Calculate the end timestamps and determine whether or not an entry spans
716     // several segments.
717     if (-1 == entries[i].end) {
718       if (i == last_atom_at)
719         entries[i].end = 1LL << 62;
720 
721       else {
722         int next_atom = i + 1;
723 
724         while (!entries[next_atom].is_atom)
725           ++next_atom;
726 
727         entries[i].end = entries[next_atom].start;
728       }
729     }
730 
731     if (   (entries[i].start < min_ts)
732         || ((max_ts >= 0) && (entries[i].start > max_ts)))
733       entries[i].remove = true;
734 
735     if (entries[i].remove && (entries[i].start < min_ts) && (entries[i].end > min_ts))
736       entries[i].spans = true;
737 
738     mxdebug_if(s_debug, fmt::format("remove_chapters: entries[{0}]: remove {1} spans {2} start {3} end {4}\n", i, entries[i].remove, entries[i].spans, entries[i].start, entries[i].end));
739 
740     // Spanning entries must be kept, and their start timestamp must be
741     // adjusted. Entries that are to be deleted will be deleted later and
742     // have to be skipped for now.
743     if (entries[i].remove && !entries[i].spans)
744       continue;
745 
746     auto cts = static_cast<KaxChapterTimeStart *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeStart), false));
747     auto cte = static_cast<KaxChapterTimeEnd *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeEnd), false));
748 
749     if (entries[i].spans)
750       cts->SetValue(min_ts);
751 
752     cts->SetValue(cts->GetValue() - offset);
753 
754     if (cte) {
755       int64_t end_ts = cte->GetValue();
756 
757       if ((max_ts >= 0) && (end_ts > max_ts))
758         end_ts = max_ts;
759       end_ts -= offset;
760 
761       cte->SetValue(end_ts);
762     }
763 
764     auto m2 = dynamic_cast<EbmlMaster *>(m[i]);
765     if (m2)
766       remove_entries(min_ts, max_ts, offset, *m2);
767   }
768 
769   // Now really delete those entries.
770   i = m.ListSize();
771   while (0 < i) {
772     --i;
773     if (entries[i].remove && !entries[i].spans) {
774       delete m[i];
775       m.Remove(i);
776     }
777   }
778 }
779 
780 /** \brief Merge all chapter atoms sharing the same UID
781 
782    If two or more chapters with the same UID are encountered on the same
783    level then those are merged into a single chapter. The start timestamp
784    is the minimum start timestamp of all the chapters, and the end timestamp
785    is the maximum end timestamp of all the chapters.
786 
787    The parameters do not have to be checked for validity.
788 
789    \param master The master containing the elements to check.
790 */
791 void
merge_entries(EbmlMaster & master)792 merge_entries(EbmlMaster &master) {
793   size_t master_idx;
794 
795   // Iterate over all children of the atomaster.
796   for (master_idx = 0; master.ListSize() > master_idx; ++master_idx) {
797     // Not every child is a chapter atomaster. Skip those.
798     auto atom = dynamic_cast<KaxChapterAtom *>(master[master_idx]);
799     if (!atom)
800       continue;
801 
802     int64_t uid = get_uid(*atom);
803     if (-1 == uid)
804       continue;
805 
806     // First get the start and end time, if present.
807     int64_t start_ts = get_start(*atom, 0);
808     int64_t end_ts   = get_end(*atom);
809 
810     mxdebug_if(s_debug, fmt::format("chapters: merge_entries: looking for {0} with {1}, {2}\n", uid, start_ts, end_ts));
811 
812     // Now iterate over all remaining atoms and find those with the same
813     // UID.
814     size_t merge_idx = master_idx + 1;
815     while (true) {
816       KaxChapterAtom *merge_this = nullptr;
817       for (; master.ListSize() > merge_idx; ++merge_idx) {
818         auto cmp_atom = dynamic_cast<KaxChapterAtom *>(master[merge_idx]);
819         if (!cmp_atom)
820           continue;
821 
822         if (get_uid(*cmp_atom) == uid) {
823           merge_this = cmp_atom;
824           break;
825         }
826       }
827 
828       // If we haven't found an atom with the same UID then we're done here.
829       if (!merge_this)
830         break;
831 
832       // Do the merger! First get the start and end timestamps if present.
833       int64_t merge_start_ts = get_start(*merge_this, 0);
834       int64_t merge_end_ts   = get_end(*merge_this);
835 
836       // Then compare them to the ones we have for the soon-to-be merged
837       // chapter and assign accordingly.
838       if (merge_start_ts < start_ts)
839         start_ts = merge_start_ts;
840 
841       if ((-1 == end_ts) || (merge_end_ts > end_ts))
842         end_ts = merge_end_ts;
843 
844       // Move all chapter atoms from the merged entry into the target
845       // entry so that they will be merged recursively as well.
846       auto merge_child_idx = 0u;
847       auto num_children    = merge_this->ListSize();
848 
849       while (merge_child_idx < num_children) {
850         if (Is<KaxChapterAtom>((*merge_this)[merge_child_idx])) {
851           atom->PushElement(*(*merge_this)[merge_child_idx]);
852           merge_this->Remove(merge_child_idx);
853           --num_children;
854 
855         } else
856           ++merge_child_idx;
857       }
858 
859       mxdebug_if(s_debug, fmt::format("chapters: merge_entries:   found one at {0} with {1}, {2}; merged to {3}, {4}\n", merge_idx, merge_start_ts, merge_end_ts, start_ts, end_ts));
860 
861       // Finally remove the entry itself.
862       delete master[merge_idx];
863       master.Remove(merge_idx);
864     }
865 
866     // Assign the start and end timestamp to the chapter. Only assign an
867     // end timestamp if one was present in at least one of the merged
868     // chapter atoms.
869     GetChild<KaxChapterTimeStart>(*atom).SetValue(start_ts);
870     if (-1 != end_ts)
871       GetChild<KaxChapterTimeEnd>(*atom).SetValue(end_ts);
872   }
873 
874   // Recusively merge atoms.
875   for (master_idx = 0; master.ListSize() > master_idx; ++master_idx) {
876     auto merge_master = dynamic_cast<EbmlMaster *>(master[master_idx]);
877     if (merge_master)
878       merge_entries(*merge_master);
879   }
880 }
881 
882 /** \brief Remove all chapter atoms that are outside of a time range
883 
884    All chapter atoms that lie completely outside the timestamp range
885    given with <tt>[min_ts..max_ts]</tt> are deleted.
886 
887    Chapters which start before the window but end inside or after the window
888    are kept as well, and their start timestamp is adjusted.
889 
890    If two or more chapters with the same UID are encountered on the same
891    level then those are merged into a single chapter. The start timestamp
892    is the minimum start timestamp of all the chapters, and the end timestamp
893    is the maximum end timestamp of all the chapters.
894 
895    The parameters are checked for validity.
896 
897    \param chapters The chapters to check.
898    \param min_ts The minimum timestamp to accept.
899    \param max_ts The maximum timestamp to accept.
900    \param offset This value is subtracted from both the start and end timestamp
901      for each chapter after the decision whether or not to keep it has been
902      made.
903 
904    \return \c false if all chapters were discarded, \c true otherwise
905 */
906 bool
select_in_timeframe(KaxChapters * chapters,int64_t min_ts,int64_t max_ts,int64_t offset)907 select_in_timeframe(KaxChapters *chapters,
908                     int64_t min_ts,
909                     int64_t max_ts,
910                     int64_t offset) {
911   // Check the parameters.
912   if (!chapters)
913     return false;
914 
915   // Remove the atoms that are outside of the requested range.
916   size_t master_idx;
917   for (master_idx = 0; chapters->ListSize() > master_idx; master_idx++) {
918     EbmlMaster *work_master = dynamic_cast<KaxEditionEntry *>((*chapters)[master_idx]);
919     if (work_master)
920       remove_entries(min_ts, max_ts, offset, *work_master);
921   }
922 
923   // Count the number of atoms in each edition. Delete editions without
924   // any atom in them.
925   master_idx = 0;
926   while (chapters->ListSize() > master_idx) {
927     auto eentry = dynamic_cast<KaxEditionEntry *>((*chapters)[master_idx]);
928     if (!eentry) {
929       master_idx++;
930       continue;
931     }
932 
933     size_t num_atoms = 0, eentry_idx;
934     for (eentry_idx = 0; eentry->ListSize() > eentry_idx; eentry_idx++)
935       if (dynamic_cast<KaxChapterAtom *>((*eentry)[eentry_idx]))
936         num_atoms++;
937 
938     if (0 == num_atoms) {
939       chapters->Remove(master_idx);
940       delete eentry;
941 
942     } else
943       master_idx++;
944   }
945 
946   return chapters->ListSize() > 0;
947 }
948 
949 /** \brief Find an edition with a specific UID.
950 
951    Its parameters don't have to be checked for validity.
952 
953    \param chapters The chapters in which to look for the edition.
954    \param uid The requested unique edition ID. The special value \c 0
955      results in the first edition being returned.
956 
957    \return A pointer to the edition or \c nullptr if none has been found.
958 */
959 KaxEditionEntry *
find_edition_with_uid(KaxChapters & chapters,uint64_t uid)960 find_edition_with_uid(KaxChapters &chapters,
961                       uint64_t uid) {
962   if (0 == uid)
963     return FindChild<KaxEditionEntry>(&chapters);
964 
965   size_t eentry_idx;
966   for (eentry_idx = 0; chapters.ListSize() > eentry_idx; eentry_idx++) {
967     auto eentry = dynamic_cast<KaxEditionEntry *>(chapters[eentry_idx]);
968     if (!eentry)
969       continue;
970 
971     auto euid = FindChild<KaxEditionUID>(eentry);
972     if (euid && (euid->GetValue() == uid))
973       return eentry;
974   }
975 
976   return nullptr;
977 }
978 
979 /** \brief Find a chapter atom with a specific UID.
980 
981    Its parameters don't have to be checked for validity.
982 
983    \param chapters The chapters in which to look for the atom.
984    \param uid The requested unique atom ID. The special value \c 0 results in
985      the first atom in the first edition being returned.
986 
987    \return A pointer to the atom or \c nullptr if none has been found.
988 */
989 KaxChapterAtom *
find_chapter_with_uid(KaxChapters & chapters,uint64_t uid)990 find_chapter_with_uid(KaxChapters &chapters,
991                       uint64_t uid) {
992   if (0 == uid) {
993     auto eentry = FindChild<KaxEditionEntry>(&chapters);
994     if (!eentry)
995       return nullptr;
996     return FindChild<KaxChapterAtom>(eentry);
997   }
998 
999   size_t eentry_idx;
1000   for (eentry_idx = 0; chapters.ListSize() > eentry_idx; eentry_idx++) {
1001     auto eentry = dynamic_cast<KaxEditionEntry *>(chapters[eentry_idx]);
1002     if (!eentry)
1003       continue;
1004 
1005     size_t atom_idx;
1006     for (atom_idx = 0; eentry->ListSize() > atom_idx; atom_idx++) {
1007       auto atom = dynamic_cast<KaxChapterAtom *>((*eentry)[atom_idx]);
1008       if (!atom)
1009         continue;
1010 
1011       auto cuid = FindChild<KaxChapterUID>(atom);
1012       if (cuid && (cuid->GetValue() == uid))
1013         return atom;
1014     }
1015   }
1016 
1017   return nullptr;
1018 }
1019 
1020 /** \brief Move all chapter atoms to another container keeping editions intact
1021 
1022    This function moves all chapter atoms from \a src to \a dst.
1023    If there's already an edition in \a dst with the same UID as the current
1024    one in \a src, then all atoms will be put into that edition. Otherwise
1025    the complete edition will simply be moved over.
1026 
1027    After processing \a src will be empty.
1028 
1029    Its parameters don't have to be checked for validity.
1030 
1031    \param dst The container the atoms and editions will be put into.
1032    \param src The container the atoms and editions will be taken from.
1033 */
1034 void
move_by_edition(KaxChapters & dst,KaxChapters & src)1035 move_by_edition(KaxChapters &dst,
1036                 KaxChapters &src) {
1037   size_t src_idx;
1038   for (src_idx = 0; src.ListSize() > src_idx; src_idx++) {
1039     auto m = dynamic_cast<EbmlMaster *>(src[src_idx]);
1040     if (!m)
1041       continue;
1042 
1043     // Find an edition to which these atoms will be added.
1044     KaxEditionEntry *ee_dst = nullptr;
1045     auto euid_src = FindChild<KaxEditionUID>(m);
1046     if (euid_src)
1047       ee_dst = find_edition_with_uid(dst, euid_src->GetValue());
1048 
1049     // No edition with the same UID found as the one we want to handle?
1050     // Then simply move the complete edition over.
1051     if (!ee_dst)
1052       dst.PushElement(*m);
1053     else {
1054       // Move all atoms from the old edition to the new one.
1055       size_t master_idx;
1056       for (master_idx = 0; m->ListSize() > master_idx; master_idx++)
1057         if (Is<KaxChapterAtom>((*m)[master_idx]))
1058           ee_dst->PushElement(*(*m)[master_idx]);
1059         else
1060           delete (*m)[master_idx];
1061 
1062       m->RemoveAll();
1063       delete m;
1064     }
1065   }
1066 
1067   src.RemoveAll();
1068 }
1069 
1070 /** \brief Adjust all start and end timestamps by an offset
1071 
1072    All start and end timestamps are adjusted by an offset. This is done
1073    recursively.
1074 
1075    Its parameters don't have to be checked for validity.
1076 
1077    \param master A master containint the elements to adjust. This can be
1078      a KaxChapters, KaxEditionEntry or KaxChapterAtom object.
1079    \param offset The offset to add to each timestamp. Can be negative. If
1080      the resulting timestamp would be smaller than zero then it will be set
1081      to zero.
1082 */
1083 void
adjust_timestamps(EbmlMaster & master,int64_t offset,mtx_mp_rational_t const & factor)1084 adjust_timestamps(EbmlMaster &master,
1085                   int64_t offset,
1086                   mtx_mp_rational_t const &factor) {
1087   size_t master_idx;
1088   for (master_idx = 0; master.ListSize() > master_idx; master_idx++) {
1089     if (!Is<KaxChapterAtom>(master[master_idx]))
1090       continue;
1091 
1092     auto atom  = static_cast<KaxChapterAtom *>(master[master_idx]);
1093     auto start = FindChild<KaxChapterTimeStart>(atom);
1094     auto end   = FindChild<KaxChapterTimeEnd>(atom);
1095 
1096     if (start)
1097       start->SetValue(std::max<int64_t>(mtx::to_int(factor * mtx_mp_rational_t{start->GetValue()}) + offset, 0));
1098 
1099     if (end)
1100       end->SetValue(std::max<int64_t>(mtx::to_int(factor * mtx_mp_rational_t{end->GetValue()}) + offset, 0));
1101   }
1102 
1103   for (master_idx = 0; master.ListSize() > master_idx; master_idx++) {
1104     auto work_master = dynamic_cast<EbmlMaster *>(master[master_idx]);
1105     if (work_master)
1106       adjust_timestamps(*work_master, offset, factor);
1107   }
1108 }
1109 
1110 static int
count_atoms_recursively(EbmlMaster & master,int count)1111 count_atoms_recursively(EbmlMaster &master,
1112                         int count) {
1113   size_t master_idx;
1114 
1115   for (master_idx = 0; master.ListSize() > master_idx; ++master_idx)
1116     if (Is<KaxChapterAtom>(master[master_idx]))
1117       ++count;
1118 
1119     else if (dynamic_cast<EbmlMaster *>(master[master_idx]))
1120       count = count_atoms_recursively(*static_cast<EbmlMaster *>(master[master_idx]), count);
1121 
1122   return count;
1123 }
1124 
1125 int
count_atoms(EbmlMaster & master)1126 count_atoms(EbmlMaster &master) {
1127   return count_atoms_recursively(master, 0);
1128 }
1129 
1130 /** \brief Change the chapter edition UIDs to a single value
1131 
1132    This function changes the UIDs of all editions for which the
1133    function is called to a single value. This is intended for chapters
1134    read from source files which do not provide their own edition UIDs
1135    (e.g. MP4 or OGM files) so that their chapters can be appended and
1136    don't end up in separate editions.
1137 
1138    \c chapters may be nullptr in which case nothing is done.
1139 
1140    \param dst chapters The chapter structure for which all edition
1141       UIDs will be changed.
1142 */
1143 void
align_uids(KaxChapters * chapters)1144 align_uids(KaxChapters *chapters) {
1145   if (!chapters)
1146     return;
1147 
1148   static uint64_t s_shared_edition_uid = 0;
1149 
1150   if (0 == s_shared_edition_uid)
1151     s_shared_edition_uid = create_unique_number(UNIQUE_CHAPTER_IDS);
1152 
1153   size_t idx;
1154   for (idx = 0; chapters->ListSize() > idx; ++idx) {
1155     auto edition_entry = dynamic_cast<KaxEditionEntry *>((*chapters)[idx]);
1156     if (!edition_entry)
1157       continue;
1158 
1159     GetChild<KaxEditionUID>(*edition_entry).SetValue(s_shared_edition_uid);
1160   }
1161 }
1162 
1163 void
align_uids(KaxChapters & reference,KaxChapters & modify)1164 align_uids(KaxChapters &reference,
1165            KaxChapters &modify) {
1166   size_t reference_idx = 0, modify_idx = 0;
1167 
1168   while (true) {
1169     KaxEditionEntry *ee_reference = nullptr;;
1170     while ((reference.ListSize() > reference_idx) && !(ee_reference = dynamic_cast<KaxEditionEntry *>(reference[reference_idx])))
1171       ++reference_idx;
1172 
1173     if (!ee_reference)
1174       return;
1175 
1176     KaxEditionEntry *ee_modify = nullptr;;
1177     while ((modify.ListSize() > modify_idx) && !(ee_modify = dynamic_cast<KaxEditionEntry *>(modify[modify_idx])))
1178       ++modify_idx;
1179 
1180     if (!ee_modify)
1181       return;
1182 
1183     GetChild<KaxEditionUID>(*ee_modify).SetValue(GetChild<KaxEditionUID>(*ee_reference).GetValue());
1184     ++reference_idx;
1185     ++modify_idx;
1186   }
1187 }
1188 
1189 static void
regenerate_uids_worker(EbmlMaster & master,std::unordered_map<uint64_t,uint64_t> & new_chapter_uids)1190 regenerate_uids_worker(EbmlMaster &master,
1191                        std::unordered_map<uint64_t, uint64_t> &new_chapter_uids) {
1192   for (int idx = 0, end = master.ListSize(); end > idx; ++idx) {
1193     auto element     = master[idx];
1194     auto edition_uid = dynamic_cast<KaxEditionUID *>(element);
1195 
1196     if (edition_uid) {
1197       edition_uid->SetValue(create_unique_number(UNIQUE_EDITION_IDS));
1198       continue;
1199     }
1200 
1201     auto chapter_uid = dynamic_cast<KaxChapterUID *>(element);
1202 
1203     if (chapter_uid) {
1204       new_chapter_uids[chapter_uid->GetValue()] = create_unique_number(UNIQUE_CHAPTER_IDS);
1205       chapter_uid->SetValue(new_chapter_uids[chapter_uid->GetValue()]);
1206       continue;
1207     }
1208 
1209     auto sub_master = dynamic_cast<EbmlMaster *>(master[idx]);
1210     if (sub_master)
1211       regenerate_uids_worker(*sub_master, new_chapter_uids);
1212   }
1213 }
1214 
1215 void
regenerate_uids(EbmlMaster & master,EbmlMaster * tags)1216 regenerate_uids(EbmlMaster &master,
1217                 EbmlMaster *tags) {
1218   std::unordered_map<uint64_t, uint64_t> new_chapter_uids;
1219 
1220   regenerate_uids_worker(master, new_chapter_uids);
1221 
1222   if (tags)
1223     change_values<KaxTagChapterUID>(*tags, new_chapter_uids);
1224 }
1225 
1226 std::string
format_name_template(std::string const & name_template,int chapter_number,timestamp_c const & start_timestamp,std::string const & appended_file_name)1227 format_name_template(std::string const &name_template,
1228                      int chapter_number,
1229                      timestamp_c const &start_timestamp,
1230                      std::string const &appended_file_name) {
1231   auto name                 = name_template;
1232   auto number_re            = QRegularExpression{"<NUM(?::(\\d+))?>"};
1233   auto timestamp_re         = QRegularExpression{"<START(?::([^>]+))?>"};
1234   auto file_name_re         = QRegularExpression{"<FILE_NAME>"};
1235   auto file_name_ext_re     = QRegularExpression{"<FILE_NAME_WITH_EXT>"};
1236   auto appended_file_name_p = mtx::fs::to_path(appended_file_name);
1237 
1238   name = mtx::string::replace(name, number_re, [=](auto const &match) {
1239     auto number_str    = fmt::format("{0}", chapter_number);
1240     auto wanted_length = 1u;
1241 
1242     if (match.capturedLength(1) && !mtx::string::parse_number(to_utf8(match.captured(1)), wanted_length))
1243       wanted_length = 1;
1244 
1245     if (number_str.length() < wanted_length)
1246       number_str = std::string(wanted_length - number_str.length(), '0') + number_str;
1247 
1248     return Q(number_str);
1249   });
1250 
1251   name = mtx::string::replace(name, timestamp_re, [=](auto const &match) {
1252     auto format = match.capturedLength(1) ? to_utf8(match.captured(1)) : "%H:%M:%S"s;
1253     return Q(mtx::string::format_timestamp(start_timestamp.to_ns(), format));
1254   });
1255 
1256   return to_utf8(Q(name)
1257                  .replace(file_name_re,     Q(appended_file_name_p.stem()))
1258                  .replace(file_name_ext_re, Q(appended_file_name_p.filename())));
1259 }
1260 
1261 void
fix_country_codes(EbmlMaster & chapters)1262 fix_country_codes(EbmlMaster &chapters) {
1263   for (auto const &child : chapters) {
1264     auto sub_master = dynamic_cast<EbmlMaster *>(child);
1265     if (sub_master) {
1266       fix_country_codes(*sub_master);
1267       continue;
1268     }
1269 
1270     auto ccountry = dynamic_cast<KaxChapterCountry *>(child);
1271     if (!ccountry)
1272       continue;
1273 
1274     auto country_opt = mtx::iso3166::look_up_cctld(ccountry->GetValue());
1275     if (country_opt)
1276       ccountry->SetValue(mtx::string::to_lower_ascii(country_opt->alpha_2_code));
1277   }
1278 }
1279 
1280 std::shared_ptr<libmatroska::KaxChapters>
create_editions_and_chapters(std::vector<std::vector<timestamp_c>> const & editions_timestamps,mtx::bcp47::language_c const & language,std::string const & name_template)1281 create_editions_and_chapters(std::vector<std::vector<timestamp_c>> const &editions_timestamps,
1282                              mtx::bcp47::language_c const &language,
1283                              std::string const &name_template) {
1284   auto chapters          = std::make_shared<libmatroska::KaxChapters>();
1285   auto use_name_template = !name_template.empty()        ? name_template
1286                          :                                 g_chapter_generation_name_template.get_translated();
1287   auto use_language      = language.is_valid()           ? language
1288                          : g_default_language.is_valid() ? g_default_language
1289                          :                                 mtx::bcp47::language_c::parse("eng");
1290 
1291   for (auto const &timestamps : editions_timestamps) {
1292     auto edition        = new libmatroska::KaxEditionEntry;
1293     auto chapter_number = 0u;
1294 
1295     chapters->PushElement(*edition);
1296 
1297     GetChild<libmatroska::KaxEditionUID>(edition).SetValue(create_unique_number(UNIQUE_EDITION_IDS));
1298 
1299     for (auto const &timestamp : timestamps) {
1300       ++chapter_number;
1301 
1302       auto name = format_name_template(use_name_template, chapter_number, timestamp);
1303       auto atom = mtx::construct::cons<libmatroska::KaxChapterAtom>(new libmatroska::KaxChapterUID,       create_unique_number(UNIQUE_CHAPTER_IDS),
1304                                                                     new libmatroska::KaxChapterTimeStart, timestamp.to_ns());
1305 
1306       if (!name.empty())
1307         atom->PushElement(*mtx::construct::cons<libmatroska::KaxChapterDisplay>(new libmatroska::KaxChapterString,    name,
1308                                                                                 new libmatroska::KaxChapterLanguage,  use_language.get_iso639_2_alpha_3_code_or("und"),
1309                                                                                 new libmatroska::KaxChapLanguageIETF, use_language.format()));
1310 
1311       edition->PushElement(*atom);
1312     }
1313   }
1314 
1315   return chapters;
1316 }
1317 
1318 void
set_languages_in_display(libmatroska::KaxChapterDisplay & display,std::vector<mtx::bcp47::language_c> const & parsed_languages)1319 set_languages_in_display(libmatroska::KaxChapterDisplay &display,
1320                          std::vector<mtx::bcp47::language_c> const &parsed_languages) {
1321   DeleteChildren<libmatroska::KaxChapLanguageIETF>(display);
1322   DeleteChildren<libmatroska::KaxChapterLanguage>(display);
1323   DeleteChildren<libmatroska::KaxChapterCountry>(display);
1324 
1325   for (auto const &parsed_language : parsed_languages)
1326     if (parsed_language.is_valid())
1327       AddEmptyChild<libmatroska::KaxChapLanguageIETF>(display).SetValue(parsed_language.format());
1328 
1329   unify_legacy_and_bcp47_languages_and_countries(display);
1330 }
1331 
1332 void
set_languages_in_display(libmatroska::KaxChapterDisplay & display,mtx::bcp47::language_c const & parsed_language)1333 set_languages_in_display(libmatroska::KaxChapterDisplay &display,
1334                          mtx::bcp47::language_c const &parsed_language) {
1335   if (parsed_language.is_valid())
1336     set_languages_in_display(display, std::vector<mtx::bcp47::language_c>{ parsed_language });
1337 }
1338 
1339 void
set_languages_in_display(libmatroska::KaxChapterDisplay & display,std::string const & language)1340 set_languages_in_display(libmatroska::KaxChapterDisplay &display,
1341                          std::string const &language) {
1342   set_languages_in_display(display, std::vector<mtx::bcp47::language_c>{ mtx::bcp47::language_c::parse(language) });
1343 }
1344 
1345 mtx::bcp47::language_c
get_language_from_display(libmatroska::KaxChapterDisplay & display,std::string const & default_if_missing)1346 get_language_from_display(libmatroska::KaxChapterDisplay &display,
1347                           std::string const &default_if_missing) {
1348   auto language = FindChildValue<libmatroska::KaxChapLanguageIETF>(display);
1349   if (language.empty())
1350     language = FindChildValue<libmatroska::KaxChapterLanguage>(display);
1351 
1352   return mtx::bcp47::language_c::parse(!language.empty() ? language : default_if_missing);
1353 }
1354 
1355 }
1356