1 /** \brief chapter parser and helper functions
2
3 mkvmerge -- utility for splicing together matroska files
4 from component media subtypes
5
6 Distributed under the GPL v2
7 see the file COPYING for details
8 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
9
10 \file
11
12 \author Written by Moritz Bunkus <moritz@bunkus.org>.
13 */
14
15 #include "common/common_pch.h"
16
17 #include <algorithm>
18 #include <cassert>
19
20 #include <QRegularExpression>
21
22 #include <matroska/KaxChapters.h>
23
24 #include "common/bcp47.h"
25 #include "common/chapters/chapters.h"
26 #include "common/chapters/dvd.h"
27 #include "common/construct.h"
28 #include "common/container.h"
29 #include "common/debugging.h"
30 #include "common/ebml.h"
31 #include "common/error.h"
32 #include "common/iso3166.h"
33 #include "common/locale.h"
34 #include "common/mm_io_x.h"
35 #include "common/mm_file_io.h"
36 #include "common/mm_proxy_io.h"
37 #include "common/mm_text_io.h"
38 #include "common/path.h"
39 #include "common/qt.h"
40 #include "common/strings/editing.h"
41 #include "common/strings/formatting.h"
42 #include "common/strings/parsing.h"
43 #include "common/unique_numbers.h"
44 #include "common/xml/ebml_chapters_converter.h"
45
46 using namespace libmatroska;
47
48 namespace mtx::chapters {
49
50 namespace {
51 debugging_option_c s_debug{"chapters|chapter_parser"};
52 }
53
54 /** The default language for all chapter entries that don't have their own. */
55 mtx::bcp47::language_c g_default_language;
56 /** The default country for all chapter entries that don't have their own. */
57 std::string g_default_country;
58
59 translatable_string_c g_chapter_generation_name_template{YT("Chapter <NUM:2>")};
60
61 constexpr auto SIMCHAP_RE_TIMESTAMP_LINE = "^\\s*CHAPTER\\d+\\s*=\\s*(\\d+)\\s*:\\s*(\\d+)\\s*:\\s*(\\d+)\\s*[\\.,]\\s*(\\d{1,9})";
62 constexpr auto SIMCHAP_RE_TIMESTAMP = "^\\s*CHAPTER\\d+\\s*=(.*)";
63 constexpr auto SIMCHAP_RE_NAME_LINE = "^\\s*CHAPTER\\d+NAME\\s*=(.*)";
64
65 void
unify_legacy_and_bcp47_languages_and_countries(EbmlElement & elt)66 unify_legacy_and_bcp47_languages_and_countries(EbmlElement &elt) {
67 auto master = dynamic_cast<libebml::EbmlMaster *>(&elt);
68 if (!master)
69 return;
70
71 auto display = dynamic_cast<KaxChapterDisplay *>(&elt);
72 if (!display) {
73 for (auto const child : *master)
74 unify_legacy_and_bcp47_languages_and_countries(*child);
75 return;
76 }
77
78 std::vector<std::string> legacy_languages, legacy_countries;
79 std::vector<mtx::bcp47::language_c> bcp47_languages;
80 auto child_idx = 0u;
81
82 while (child_idx < display->ListSize()) {
83 auto remove_child = true;
84 auto *child = (*display)[child_idx];
85
86 if (dynamic_cast<KaxChapterLanguage *>(child)) {
87 auto legacy_language = static_cast<KaxChapterLanguage &>(*child).GetValue();
88 if (!legacy_language.empty() && !mtx::includes(legacy_languages, legacy_language))
89 legacy_languages.emplace_back(legacy_language);
90
91 } else if (dynamic_cast<KaxChapterCountry *>(child)) {
92 auto legacy_country = static_cast<KaxChapterCountry &>(*child).GetValue();
93 if (!legacy_country.empty() && !mtx::includes(legacy_countries, legacy_country))
94 legacy_countries.emplace_back(legacy_country);
95
96 } else if (dynamic_cast<KaxChapLanguageIETF *>(child)) {
97 auto bcp47_language = mtx::bcp47::language_c::parse(static_cast<KaxChapLanguageIETF &>(*child).GetValue());
98 if (bcp47_language.is_valid() && !mtx::includes(bcp47_languages, bcp47_language))
99 bcp47_languages.emplace_back(bcp47_language);
100
101 } else
102 remove_child = false;
103
104 if (remove_child) {
105 display->Remove(child_idx);
106 delete child;
107
108 } else
109 ++child_idx;
110 }
111
112 if (legacy_languages.empty() && bcp47_languages.empty())
113 legacy_languages.emplace_back("eng"s);
114
115 if (bcp47_languages.empty()) {
116 auto add_maybe = [&bcp47_languages](std::string const &new_bcp47_language_str) {
117 auto new_bcp47_language = mtx::bcp47::language_c::parse(new_bcp47_language_str);
118 if (new_bcp47_language.is_valid() && !mtx::includes(bcp47_languages, new_bcp47_language))
119 bcp47_languages.emplace_back(new_bcp47_language);
120 };
121
122 for (auto const &legacy_language : legacy_languages) {
123 if (legacy_countries.empty())
124 add_maybe(legacy_language);
125
126 else
127 for (auto const &legacy_country : legacy_countries) {
128 auto language_and_region = fmt::format("{0}-{1}", legacy_language, mtx::string::to_lower_ascii(legacy_country) == "uk" ? "gb"s : legacy_country);
129 add_maybe(language_and_region);
130 }
131 }
132 }
133
134 legacy_languages.clear();
135 legacy_countries.clear();
136
137 for (auto const &bcp47_language : bcp47_languages) {
138 auto legacy_language = bcp47_language.get_iso639_2_alpha_3_code_or("und");
139
140 if (!mtx::includes(legacy_languages, legacy_language))
141 legacy_languages.emplace_back(legacy_language);
142
143 auto legacy_country = bcp47_language.get_top_level_domain_country_code();
144
145 if (!legacy_country.empty() && !mtx::includes(legacy_countries, legacy_country))
146 legacy_countries.emplace_back(legacy_country);
147 }
148
149 std::sort(legacy_languages.begin(), legacy_languages.end());
150 std::sort(legacy_countries.begin(), legacy_countries.end());
151 std::sort(bcp47_languages.begin(), bcp47_languages.end());
152
153 for (auto const &legacy_language : legacy_languages)
154 AddEmptyChild<KaxChapterLanguage>(display).SetValue(legacy_language);
155
156 for (auto const &legacy_country : legacy_countries)
157 AddEmptyChild<KaxChapterCountry>(display).SetValue(legacy_country);
158
159 if (mtx::bcp47::language_c::is_disabled())
160 return;
161
162 for (auto const &bcp47_language : bcp47_languages)
163 AddEmptyChild<KaxChapLanguageIETF>(display).SetValue(bcp47_language.format());
164 }
165
166 /** \brief Throw a special chapter parser exception.
167
168 \param error The error message.
169 */
170 inline void
chapter_error(const std::string & error)171 chapter_error(const std::string &error) {
172 throw parser_x(fmt::format(Y("Simple chapter parser: {0}\n"), error));
173 }
174
175 /** \brief Reads the start of a file and checks for OGM style comments.
176
177 The first lines are read. OGM style comments are recognized if the first
178 non-empty line contains <tt>CHAPTER01=...</tt> and the first non-empty
179 line afterwards contains <tt>CHAPTER01NAME=...</tt>.
180
181 The parameters are checked for validity.
182
183 \param in The file to read from.
184
185 \return \c true if the file contains OGM style comments and \c false
186 otherwise.
187 */
188 bool
probe_simple(mm_text_io_c * in)189 probe_simple(mm_text_io_c *in) {
190 QRegularExpression timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
191 QRegularExpression name_line_re{ SIMCHAP_RE_NAME_LINE};
192
193 std::string line;
194
195 assert(in);
196
197 in->setFilePointer(0);
198 while (in->getline2(line)) {
199 mtx::string::strip(line);
200 if (line.empty())
201 continue;
202
203 if (!Q(line).contains(timestamp_line_re))
204 return false;
205
206 while (in->getline2(line)) {
207 mtx::string::strip(line);
208 if (line.empty())
209 continue;
210
211 return Q(line).contains(name_line_re);
212 }
213
214 return false;
215 }
216
217 return false;
218 }
219
220 // 1 2
221 // 012345678901234567890123
222 //
223 // CHAPTER01=00:00:00.000
224 // CHAPTER01NAME=Hallo Welt
225
226 /** \brief Parse simple OGM style comments
227
228 The file \a in is read. The content is assumed to be OGM style comments.
229
230 The parameters are checked for validity.
231
232 \param in The text file to read from.
233 \param min_ts An optional timestamp. If both \a min_ts and \a max_ts are
234 given then only those chapters that lie in the timerange
235 <tt>[min_ts..max_ts]</tt> are kept.
236 \param max_ts An optional timestamp. If both \a min_ts and \a max_ts are
237 given then only those chapters that lie in the timerange
238 <tt>[min_ts..max_ts]</tt> are kept.
239 \param offset An optional offset that is subtracted from all start and
240 end timestamps after the timerange check has been made.
241 \param language This language is added as the \c KaxChapterLanguage
242 for all entries.
243 \param charset The charset the chapters are supposed to be it. The entries
244 will be converted to UTF-8 if necessary.
245 \param exception_on_error If set to \c true then an exception is thrown
246 if an error occurs. Otherwise \c nullptr will be returned.
247
248 \return The chapters parsed from the file or \c nullptr if an error occurred.
249 */
250 kax_cptr
parse_simple(mm_text_io_c * in,int64_t min_ts,int64_t max_ts,int64_t offset,mtx::bcp47::language_c const & language,std::string const & charset)251 parse_simple(mm_text_io_c *in,
252 int64_t min_ts,
253 int64_t max_ts,
254 int64_t offset,
255 mtx::bcp47::language_c const &language,
256 std::string const &charset) {
257 assert(in);
258
259 in->setFilePointer(0);
260
261 kax_cptr chaps{new KaxChapters};
262 KaxChapterAtom *atom = nullptr;
263 KaxEditionEntry *edition = nullptr;
264 int mode = 0;
265 int num = 0;
266 int64_t start = 0;
267 charset_converter_cptr cc_utf8;
268
269 bool do_convert = in->get_byte_order_mark() == byte_order_mark_e::none;
270 if (do_convert)
271 cc_utf8 = charset_converter_c::init(charset);
272
273 auto use_language = language.is_valid() ? language
274 : g_default_language.is_valid() ? g_default_language
275 : mtx::bcp47::language_c::parse("eng"s);
276
277 QRegularExpression timestamp_line_re{SIMCHAP_RE_TIMESTAMP_LINE};
278 QRegularExpression timestamp_re{ SIMCHAP_RE_TIMESTAMP};
279 QRegularExpression name_line_re{ SIMCHAP_RE_NAME_LINE};
280 QRegularExpressionMatch matches;
281
282 std::string line;
283
284 while (in->getline2(line)) {
285 if (do_convert)
286 line = cc_utf8->utf8(line);
287
288 mtx::string::strip(line);
289 if (line.empty())
290 continue;
291
292 if (0 == mode) {
293 matches = timestamp_line_re.match(Q(line));
294 if (!matches.hasMatch())
295 chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxx=... line."), line));
296
297 int64_t hour = 0, minute = 0, second = 0, nsecs = 0;
298 mtx::string::parse_number(to_utf8(matches.captured(1)), hour);
299 mtx::string::parse_number(to_utf8(matches.captured(2)), minute);
300 mtx::string::parse_number(to_utf8(matches.captured(3)), second);
301 mtx::string::parse_number(to_utf8(matches.captured(4)), nsecs);
302
303 if (59 < minute)
304 chapter_error(fmt::format(Y("Invalid minute: {0}"), minute));
305 if (59 < second)
306 chapter_error(fmt::format(Y("Invalid second: {0}"), second));
307
308 for (int idx = matches.capturedLength(4); idx < 9; ++idx)
309 nsecs *= 10;
310
311 start = nsecs + (second + minute * 60 + hour * 60 * 60) * 1'000'000'000;
312 mode = 1;
313
314 if (matches = timestamp_re.match(Q(line)); !matches.hasMatch())
315 chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxx=... line."), line));
316
317 } else {
318 if (matches = name_line_re.match(Q(line)); !matches.hasMatch())
319 chapter_error(fmt::format(Y("'{0}' is not a CHAPTERxxNAME=... line."), line));
320
321 auto name = to_utf8(matches.captured(1));
322 if (name.empty())
323 name = format_name_template(g_chapter_generation_name_template.get_translated(), num + 1, timestamp_c::ns(start));
324
325 mode = 0;
326
327 if ((start >= min_ts) && ((start <= max_ts) || (max_ts == -1))) {
328 if (!edition)
329 edition = &GetChild<KaxEditionEntry>(*chaps);
330
331 atom = &GetFirstOrNextChild<KaxChapterAtom>(*edition, atom);
332 GetChild<KaxChapterUID>(*atom).SetValue(create_unique_number(UNIQUE_CHAPTER_IDS));
333 GetChild<KaxChapterTimeStart>(*atom).SetValue(start - offset);
334
335 auto &display = GetChild<KaxChapterDisplay>(*atom);
336
337 GetChild<KaxChapterString>(display).SetValueUTF8(name);
338 if (use_language.is_valid()) {
339 GetChild<KaxChapterLanguage>(display).SetValue(use_language.get_iso639_2_alpha_3_code_or("und"));
340 if (!mtx::bcp47::language_c::is_disabled())
341 GetChild<KaxChapLanguageIETF>(display).SetValue(use_language.format());
342 else
343 DeleteChildren<KaxChapLanguageIETF>(display);
344 }
345
346 if (!g_default_country.empty())
347 GetChild<KaxChapterCountry>(display).SetValue(g_default_country);
348
349 ++num;
350 }
351 }
352 }
353
354 return 0 == num ? kax_cptr{} : chaps;
355 }
356
357 /** \brief Probe a file for different chapter formats and parse the file.
358
359 The file \a file_name is opened and checked for supported chapter formats.
360 These include simple OGM style chapters, cue sheets and mkvtoolnix' own
361 XML chapter format.
362
363 Its parameters don't have to be checked for validity.
364
365 \param file_name The name of the text file to read from.
366 \param min_ts An optional timestamp. If both \a min_ts and \a max_ts are
367 given then only those chapters that lie in the timerange
368 <tt>[min_ts..max_ts]</tt> are kept.
369 \param max_ts An optional timestamp. If both \a min_ts and \a max_ts are
370 given then only those chapters that lie in the timerange
371 <tt>[min_ts..max_ts]</tt> are kept.
372 \param offset An optional offset that is subtracted from all start and
373 end timestamps after the timerange check has been made.
374 \param language This language is added as the \c KaxChapterLanguage
375 for entries that don't specifiy it.
376 \param charset The charset the chapters are supposed to be it. The entries
377 will be converted to UTF-8 if necessary. This parameter is ignored for XML
378 chapter files.
379 \param exception_on_error If set to \c true then an exception is thrown
380 if an error occurs. Otherwise \c nullptr will be returned.
381 \param format If given, this parameter will be set to the recognized chapter
382 format. May be \c nullptr if the caller is not interested in the result.
383 \param tags When parsing a cue sheet tags will be created along with the
384 chapter entries. These tags will be stored in this parameter.
385
386 \return The chapters parsed from the file or \c nullptr if an error occurred.
387
388 \see ::parse_chapters(mm_text_io_c *in,int64_t min_ts,int64_t max_ts, int64_t offset,const mtx::bcp47::language_c &language,const std::string &charset,bool exception_on_error,format_e *format,KaxTags **tags)
389 */
390 kax_cptr
parse(const std::string & file_name,int64_t min_ts,int64_t max_ts,int64_t offset,const mtx::bcp47::language_c & language,const std::string & charset,bool exception_on_error,format_e * format,std::unique_ptr<KaxTags> * tags)391 parse(const std::string &file_name,
392 int64_t min_ts,
393 int64_t max_ts,
394 int64_t offset,
395 const mtx::bcp47::language_c &language,
396 const std::string &charset,
397 bool exception_on_error,
398 format_e *format,
399 std::unique_ptr<KaxTags> *tags) {
400 try {
401 #if defined(HAVE_DVDREAD)
402 auto parsed_dvd_chapters = maybe_parse_dvd(file_name, language);
403 if (parsed_dvd_chapters) {
404 unify_legacy_and_bcp47_languages_and_countries(*parsed_dvd_chapters);
405 return parsed_dvd_chapters;
406 }
407 #endif
408
409 mm_text_io_c in(std::make_shared<mm_file_io_c>(file_name));
410 auto parsed_chapters = parse(&in, min_ts, max_ts, offset, language, charset, exception_on_error, format, tags);
411
412 if (parsed_chapters)
413 unify_legacy_and_bcp47_languages_and_countries(*parsed_chapters);
414
415 return parsed_chapters;
416
417 } catch (parser_x &e) {
418 if (exception_on_error)
419 throw;
420 mxerror(fmt::format(Y("Could not parse the chapters in '{0}': {1}\n"), file_name, e.error()));
421
422 } catch (...) {
423 if (exception_on_error)
424 throw parser_x(fmt::format(Y("Could not open '{0}' for reading.\n"), file_name));
425 else
426 mxerror(fmt::format(Y("Could not open '{0}' for reading.\n"), file_name));
427 }
428
429 return {};
430 }
431
432 /** \brief Probe a file for different chapter formats and parse the file.
433
434 The file \a in is checked for supported chapter formats. These include
435 simple OGM style chapters, cue sheets and mkvtoolnix' own XML chapter
436 format.
437
438 The parameters are checked for validity.
439
440 \param in The text file to read from.
441 \param min_ts An optional timestamp. If both \a min_ts and \a max_ts are
442 given then only those chapters that lie in the timerange
443 <tt>[min_ts..max_ts]</tt> are kept.
444 \param max_ts An optional timestamp. If both \a min_ts and \a max_ts are
445 given then only those chapters that lie in the timerange
446 <tt>[min_ts..max_ts]</tt> are kept.
447 \param offset An optional offset that is subtracted from all start and
448 end timestamps after the timerange check has been made.
449 \param language This language is added as the \c KaxChapterLanguage
450 for entries that don't specifiy it.
451 \param charset The charset the chapters are supposed to be it. The entries
452 will be converted to UTF-8 if necessary. This parameter is ignored for XML
453 chapter files.
454 \param exception_on_error If set to \c true then an exception is thrown
455 if an error occurs. Otherwise \c nullptr will be returned.
456 \param format If given, this parameter will be set to the recognized chapter
457 format. May be \c nullptr if the caller is not interested in the result.
458 \param tags When parsing a cue sheet tags will be created along with the
459 chapter entries. These tags will be stored in this parameter.
460
461 \return The chapters parsed from the file or \c nullptr if an error occurred.
462
463 \see ::parse_chapters(const std::string &file_name,int64_t min_ts,int64_t max_ts, int64_t offset,const mtx::bcp47::language_c &language,const std::string &charset,bool exception_on_error,format_e *format,std::unique_ptr<KaxTags> *tags)
464 */
465 kax_cptr
parse(mm_text_io_c * in,int64_t min_ts,int64_t max_ts,int64_t offset,const mtx::bcp47::language_c & language,const std::string & charset,bool exception_on_error,format_e * format,std::unique_ptr<KaxTags> * tags)466 parse(mm_text_io_c *in,
467 int64_t min_ts,
468 int64_t max_ts,
469 int64_t offset,
470 const mtx::bcp47::language_c &language,
471 const std::string &charset,
472 bool exception_on_error,
473 format_e *format,
474 std::unique_ptr<KaxTags> *tags) {
475 assert(in);
476
477 std::string error;
478
479 try {
480 if (probe_simple(in)) {
481 if (format)
482 *format = format_e::ogg;
483 return parse_simple(in, min_ts, max_ts, offset, language, charset);
484
485 } else if (probe_cue(in)) {
486 if (format)
487 *format = format_e::cue;
488 return parse_cue(in, min_ts, max_ts, offset, language, charset, tags);
489
490 } else if (format)
491 *format = format_e::xml;
492
493 if (mtx::xml::ebml_chapters_converter_c::probe_file(in->get_file_name())) {
494 auto chapters = mtx::xml::ebml_chapters_converter_c::parse_file(in->get_file_name(), true);
495 return select_in_timeframe(chapters.get(), min_ts, max_ts, offset) ? chapters : nullptr;
496 }
497
498 error = fmt::format(Y("Unknown chapter file format in '{0}'. It does not contain a supported chapter format.\n"), in->get_file_name());
499 } catch (mtx::chapters::parser_x &e) {
500 error = e.error();
501 } catch (mtx::mm_io::exception &ex) {
502 error = fmt::format(Y("The XML chapter file '{0}' could not be read.\n"), in->get_file_name());
503 } catch (mtx::xml::xml_parser_x &ex) {
504 error = fmt::format(Y("The XML chapter file '{0}' contains an error at position {2}: {1}\n"), in->get_file_name(), ex.result().description(), ex.result().offset);
505 } catch (mtx::xml::exception &ex) {
506 error = fmt::format(Y("The XML chapter file '{0}' contains an error: {1}\n"), in->get_file_name(), ex.what());
507 }
508
509 if (!error.empty()) {
510 if (exception_on_error)
511 throw mtx::chapters::parser_x(error);
512 mxerror(error);
513 }
514
515 return {};
516 }
517
518 /** \brief Get the start timestamp for a chapter atom.
519
520 Its parameters don't have to be checked for validity.
521
522 \param atom The atom for which the start timestamp should be returned.
523 \param value_if_not_found The value to return if no start timestamp child
524 element was found. Defaults to -1.
525
526 \return The start timestamp or \c value_if_not_found if the atom doesn't
527 contain such a child element.
528 */
529 int64_t
get_start(KaxChapterAtom & atom,int64_t value_if_not_found)530 get_start(KaxChapterAtom &atom,
531 int64_t value_if_not_found) {
532 auto start = FindChild<KaxChapterTimeStart>(&atom);
533
534 return !start ? value_if_not_found : static_cast<int64_t>(start->GetValue());
535 }
536
537 /** \brief Get the end timestamp for a chapter atom.
538
539 Its parameters don't have to be checked for validity.
540
541 \param atom The atom for which the end timestamp should be returned.
542 \param value_if_not_found The value to return if no end timestamp child
543 element was found. Defaults to -1.
544
545 \return The start timestamp or \c value_if_not_found if the atom doesn't
546 contain such a child element.
547 */
548 int64_t
get_end(KaxChapterAtom & atom,int64_t value_if_not_found)549 get_end(KaxChapterAtom &atom,
550 int64_t value_if_not_found) {
551 auto end = FindChild<KaxChapterTimeEnd>(&atom);
552
553 return !end ? value_if_not_found : static_cast<int64_t>(end->GetValue());
554 }
555
556 /** \brief Get the name for a chapter atom.
557
558 Its parameters don't have to be checked for validity.
559
560 \param atom The atom for which the name should be returned.
561
562 \return The atom's name UTF-8 coded or \c "" if the atom doesn't contain
563 such a child element.
564 */
565 std::string
get_name(KaxChapterAtom & atom)566 get_name(KaxChapterAtom &atom) {
567 auto display = FindChild<KaxChapterDisplay>(&atom);
568 if (!display)
569 return "";
570
571 auto name = FindChild<KaxChapterString>(display);
572 if (!name)
573 return "";
574
575 return name->GetValueUTF8();
576 }
577
578 /** \brief Get the unique ID for a chapter atom.
579
580 Its parameters don't have to be checked for validity.
581
582 \param atom The atom for which the unique ID should be returned.
583
584 \return The ID or \c -1 if the atom doesn't contain such a
585 child element.
586 */
587 int64_t
get_uid(KaxChapterAtom & atom)588 get_uid(KaxChapterAtom &atom) {
589 auto uid = FindChild<KaxChapterUID>(&atom);
590
591 return !uid ? -1 : static_cast<int64_t>(uid->GetValue());
592 }
593
594 void
remove_elements_unsupported_by_webm(EbmlMaster & master)595 remove_elements_unsupported_by_webm(EbmlMaster &master) {
596 static std::unordered_map<uint32_t, bool> s_supported_elements, s_readd_with_defaults;
597
598 if (s_supported_elements.empty()) {
599 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapters)) ] = true;
600 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxEditionEntry)) ] = true;
601 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterAtom)) ] = true;
602 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterUID)) ] = true;
603 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterStringUID)) ] = true;
604 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterTimeStart)) ] = true;
605 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterTimeEnd)) ] = true;
606 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterDisplay)) ] = true;
607 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterString)) ] = true;
608 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterLanguage)) ] = true;
609 s_supported_elements[ EBML_ID_VALUE(EBML_ID(KaxChapterCountry)) ] = true;
610
611 s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxEditionFlagDefault)) ] = true;
612 s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxEditionFlagHidden)) ] = true;
613 s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxChapterFlagEnabled)) ] = true;
614 s_readd_with_defaults[ EBML_ID_VALUE(EBML_ID(KaxChapterFlagHidden)) ] = true;
615 }
616
617 auto idx = 0u;
618
619 while (idx < master.ListSize()) {
620 auto e = master[idx];
621
622 if (e && s_supported_elements[ EBML_ID_VALUE(EbmlId(*e)) ]) {
623 auto sub_master = dynamic_cast<EbmlMaster *>(e);
624 if (sub_master)
625 remove_elements_unsupported_by_webm(*sub_master);
626
627 ++idx;
628
629 continue;
630 }
631
632 if (e && s_readd_with_defaults[ EBML_ID_VALUE(EbmlId(*e)) ]) {
633 auto new_with_defaults = &(e->CreateElement());
634 delete e;
635 master.GetElementList()[idx] = new_with_defaults;
636
637 ++idx;
638
639 continue;
640 }
641
642 delete e;
643 master.Remove(idx);
644 }
645 }
646
647 /** \brief Remove all chapter atoms that are outside of a time range
648
649 All chapter atoms that lie completely outside the timestamp range
650 given with <tt>[min_ts..max_ts]</tt> are deleted. This is the workhorse
651 for ::select_chapters_in_timeframe
652
653 Chapters which start before the window but end inside or after the window
654 are kept as well, and their start timestamp is adjusted.
655
656 Its parameters don't have to be checked for validity.
657
658 \param min_ts The minimum timestamp to accept.
659 \param max_ts The maximum timestamp to accept.
660 \param offset This value is subtracted from both the start and end timestamp
661 for each chapter after the decision whether or not to keep it has been
662 made.
663 \param m The master containing the elements to check.
664 */
665 static void
remove_entries(int64_t min_ts,int64_t max_ts,int64_t offset,EbmlMaster & m)666 remove_entries(int64_t min_ts,
667 int64_t max_ts,
668 int64_t offset,
669 EbmlMaster &m) {
670 if (0 == m.ListSize())
671 return;
672
673 struct chapter_entry_t {
674 bool remove{}, spans{}, is_atom{};
675 int64_t start{}, end{-1};
676 };
677 std::vector<chapter_entry_t> entries;
678 entries.resize(m.ListSize());
679
680 unsigned int last_atom_at = 0;
681 bool last_atom_found = false;
682
683 // Determine whether or not an entry has to be removed. Also retrieve
684 // the start and end timestamps.
685 size_t i;
686 for (i = 0; m.ListSize() > i; ++i) {
687 auto atom = dynamic_cast<KaxChapterAtom *>(m[i]);
688 if (!atom)
689 continue;
690
691 last_atom_at = i;
692 last_atom_found = true;
693 entries[i].is_atom = true;
694
695 auto cts = static_cast<KaxChapterTimeStart *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeStart), false));
696
697 if (cts)
698 entries[i].start = cts->GetValue();
699
700 auto cte = static_cast<KaxChapterTimeEnd *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeEnd), false));
701
702 if (cte)
703 entries[i].end = cte->GetValue();
704 }
705
706 // We can return if we don't have a single atom to work with.
707 if (!last_atom_found)
708 return;
709
710 for (i = 0; m.ListSize() > i; ++i) {
711 auto atom = dynamic_cast<KaxChapterAtom *>(m[i]);
712 if (!atom)
713 continue;
714
715 // Calculate the end timestamps and determine whether or not an entry spans
716 // several segments.
717 if (-1 == entries[i].end) {
718 if (i == last_atom_at)
719 entries[i].end = 1LL << 62;
720
721 else {
722 int next_atom = i + 1;
723
724 while (!entries[next_atom].is_atom)
725 ++next_atom;
726
727 entries[i].end = entries[next_atom].start;
728 }
729 }
730
731 if ( (entries[i].start < min_ts)
732 || ((max_ts >= 0) && (entries[i].start > max_ts)))
733 entries[i].remove = true;
734
735 if (entries[i].remove && (entries[i].start < min_ts) && (entries[i].end > min_ts))
736 entries[i].spans = true;
737
738 mxdebug_if(s_debug, fmt::format("remove_chapters: entries[{0}]: remove {1} spans {2} start {3} end {4}\n", i, entries[i].remove, entries[i].spans, entries[i].start, entries[i].end));
739
740 // Spanning entries must be kept, and their start timestamp must be
741 // adjusted. Entries that are to be deleted will be deleted later and
742 // have to be skipped for now.
743 if (entries[i].remove && !entries[i].spans)
744 continue;
745
746 auto cts = static_cast<KaxChapterTimeStart *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeStart), false));
747 auto cte = static_cast<KaxChapterTimeEnd *>(atom->FindFirstElt(EBML_INFO(KaxChapterTimeEnd), false));
748
749 if (entries[i].spans)
750 cts->SetValue(min_ts);
751
752 cts->SetValue(cts->GetValue() - offset);
753
754 if (cte) {
755 int64_t end_ts = cte->GetValue();
756
757 if ((max_ts >= 0) && (end_ts > max_ts))
758 end_ts = max_ts;
759 end_ts -= offset;
760
761 cte->SetValue(end_ts);
762 }
763
764 auto m2 = dynamic_cast<EbmlMaster *>(m[i]);
765 if (m2)
766 remove_entries(min_ts, max_ts, offset, *m2);
767 }
768
769 // Now really delete those entries.
770 i = m.ListSize();
771 while (0 < i) {
772 --i;
773 if (entries[i].remove && !entries[i].spans) {
774 delete m[i];
775 m.Remove(i);
776 }
777 }
778 }
779
780 /** \brief Merge all chapter atoms sharing the same UID
781
782 If two or more chapters with the same UID are encountered on the same
783 level then those are merged into a single chapter. The start timestamp
784 is the minimum start timestamp of all the chapters, and the end timestamp
785 is the maximum end timestamp of all the chapters.
786
787 The parameters do not have to be checked for validity.
788
789 \param master The master containing the elements to check.
790 */
791 void
merge_entries(EbmlMaster & master)792 merge_entries(EbmlMaster &master) {
793 size_t master_idx;
794
795 // Iterate over all children of the atomaster.
796 for (master_idx = 0; master.ListSize() > master_idx; ++master_idx) {
797 // Not every child is a chapter atomaster. Skip those.
798 auto atom = dynamic_cast<KaxChapterAtom *>(master[master_idx]);
799 if (!atom)
800 continue;
801
802 int64_t uid = get_uid(*atom);
803 if (-1 == uid)
804 continue;
805
806 // First get the start and end time, if present.
807 int64_t start_ts = get_start(*atom, 0);
808 int64_t end_ts = get_end(*atom);
809
810 mxdebug_if(s_debug, fmt::format("chapters: merge_entries: looking for {0} with {1}, {2}\n", uid, start_ts, end_ts));
811
812 // Now iterate over all remaining atoms and find those with the same
813 // UID.
814 size_t merge_idx = master_idx + 1;
815 while (true) {
816 KaxChapterAtom *merge_this = nullptr;
817 for (; master.ListSize() > merge_idx; ++merge_idx) {
818 auto cmp_atom = dynamic_cast<KaxChapterAtom *>(master[merge_idx]);
819 if (!cmp_atom)
820 continue;
821
822 if (get_uid(*cmp_atom) == uid) {
823 merge_this = cmp_atom;
824 break;
825 }
826 }
827
828 // If we haven't found an atom with the same UID then we're done here.
829 if (!merge_this)
830 break;
831
832 // Do the merger! First get the start and end timestamps if present.
833 int64_t merge_start_ts = get_start(*merge_this, 0);
834 int64_t merge_end_ts = get_end(*merge_this);
835
836 // Then compare them to the ones we have for the soon-to-be merged
837 // chapter and assign accordingly.
838 if (merge_start_ts < start_ts)
839 start_ts = merge_start_ts;
840
841 if ((-1 == end_ts) || (merge_end_ts > end_ts))
842 end_ts = merge_end_ts;
843
844 // Move all chapter atoms from the merged entry into the target
845 // entry so that they will be merged recursively as well.
846 auto merge_child_idx = 0u;
847 auto num_children = merge_this->ListSize();
848
849 while (merge_child_idx < num_children) {
850 if (Is<KaxChapterAtom>((*merge_this)[merge_child_idx])) {
851 atom->PushElement(*(*merge_this)[merge_child_idx]);
852 merge_this->Remove(merge_child_idx);
853 --num_children;
854
855 } else
856 ++merge_child_idx;
857 }
858
859 mxdebug_if(s_debug, fmt::format("chapters: merge_entries: found one at {0} with {1}, {2}; merged to {3}, {4}\n", merge_idx, merge_start_ts, merge_end_ts, start_ts, end_ts));
860
861 // Finally remove the entry itself.
862 delete master[merge_idx];
863 master.Remove(merge_idx);
864 }
865
866 // Assign the start and end timestamp to the chapter. Only assign an
867 // end timestamp if one was present in at least one of the merged
868 // chapter atoms.
869 GetChild<KaxChapterTimeStart>(*atom).SetValue(start_ts);
870 if (-1 != end_ts)
871 GetChild<KaxChapterTimeEnd>(*atom).SetValue(end_ts);
872 }
873
874 // Recusively merge atoms.
875 for (master_idx = 0; master.ListSize() > master_idx; ++master_idx) {
876 auto merge_master = dynamic_cast<EbmlMaster *>(master[master_idx]);
877 if (merge_master)
878 merge_entries(*merge_master);
879 }
880 }
881
882 /** \brief Remove all chapter atoms that are outside of a time range
883
884 All chapter atoms that lie completely outside the timestamp range
885 given with <tt>[min_ts..max_ts]</tt> are deleted.
886
887 Chapters which start before the window but end inside or after the window
888 are kept as well, and their start timestamp is adjusted.
889
890 If two or more chapters with the same UID are encountered on the same
891 level then those are merged into a single chapter. The start timestamp
892 is the minimum start timestamp of all the chapters, and the end timestamp
893 is the maximum end timestamp of all the chapters.
894
895 The parameters are checked for validity.
896
897 \param chapters The chapters to check.
898 \param min_ts The minimum timestamp to accept.
899 \param max_ts The maximum timestamp to accept.
900 \param offset This value is subtracted from both the start and end timestamp
901 for each chapter after the decision whether or not to keep it has been
902 made.
903
904 \return \c false if all chapters were discarded, \c true otherwise
905 */
906 bool
select_in_timeframe(KaxChapters * chapters,int64_t min_ts,int64_t max_ts,int64_t offset)907 select_in_timeframe(KaxChapters *chapters,
908 int64_t min_ts,
909 int64_t max_ts,
910 int64_t offset) {
911 // Check the parameters.
912 if (!chapters)
913 return false;
914
915 // Remove the atoms that are outside of the requested range.
916 size_t master_idx;
917 for (master_idx = 0; chapters->ListSize() > master_idx; master_idx++) {
918 EbmlMaster *work_master = dynamic_cast<KaxEditionEntry *>((*chapters)[master_idx]);
919 if (work_master)
920 remove_entries(min_ts, max_ts, offset, *work_master);
921 }
922
923 // Count the number of atoms in each edition. Delete editions without
924 // any atom in them.
925 master_idx = 0;
926 while (chapters->ListSize() > master_idx) {
927 auto eentry = dynamic_cast<KaxEditionEntry *>((*chapters)[master_idx]);
928 if (!eentry) {
929 master_idx++;
930 continue;
931 }
932
933 size_t num_atoms = 0, eentry_idx;
934 for (eentry_idx = 0; eentry->ListSize() > eentry_idx; eentry_idx++)
935 if (dynamic_cast<KaxChapterAtom *>((*eentry)[eentry_idx]))
936 num_atoms++;
937
938 if (0 == num_atoms) {
939 chapters->Remove(master_idx);
940 delete eentry;
941
942 } else
943 master_idx++;
944 }
945
946 return chapters->ListSize() > 0;
947 }
948
949 /** \brief Find an edition with a specific UID.
950
951 Its parameters don't have to be checked for validity.
952
953 \param chapters The chapters in which to look for the edition.
954 \param uid The requested unique edition ID. The special value \c 0
955 results in the first edition being returned.
956
957 \return A pointer to the edition or \c nullptr if none has been found.
958 */
959 KaxEditionEntry *
find_edition_with_uid(KaxChapters & chapters,uint64_t uid)960 find_edition_with_uid(KaxChapters &chapters,
961 uint64_t uid) {
962 if (0 == uid)
963 return FindChild<KaxEditionEntry>(&chapters);
964
965 size_t eentry_idx;
966 for (eentry_idx = 0; chapters.ListSize() > eentry_idx; eentry_idx++) {
967 auto eentry = dynamic_cast<KaxEditionEntry *>(chapters[eentry_idx]);
968 if (!eentry)
969 continue;
970
971 auto euid = FindChild<KaxEditionUID>(eentry);
972 if (euid && (euid->GetValue() == uid))
973 return eentry;
974 }
975
976 return nullptr;
977 }
978
979 /** \brief Find a chapter atom with a specific UID.
980
981 Its parameters don't have to be checked for validity.
982
983 \param chapters The chapters in which to look for the atom.
984 \param uid The requested unique atom ID. The special value \c 0 results in
985 the first atom in the first edition being returned.
986
987 \return A pointer to the atom or \c nullptr if none has been found.
988 */
989 KaxChapterAtom *
find_chapter_with_uid(KaxChapters & chapters,uint64_t uid)990 find_chapter_with_uid(KaxChapters &chapters,
991 uint64_t uid) {
992 if (0 == uid) {
993 auto eentry = FindChild<KaxEditionEntry>(&chapters);
994 if (!eentry)
995 return nullptr;
996 return FindChild<KaxChapterAtom>(eentry);
997 }
998
999 size_t eentry_idx;
1000 for (eentry_idx = 0; chapters.ListSize() > eentry_idx; eentry_idx++) {
1001 auto eentry = dynamic_cast<KaxEditionEntry *>(chapters[eentry_idx]);
1002 if (!eentry)
1003 continue;
1004
1005 size_t atom_idx;
1006 for (atom_idx = 0; eentry->ListSize() > atom_idx; atom_idx++) {
1007 auto atom = dynamic_cast<KaxChapterAtom *>((*eentry)[atom_idx]);
1008 if (!atom)
1009 continue;
1010
1011 auto cuid = FindChild<KaxChapterUID>(atom);
1012 if (cuid && (cuid->GetValue() == uid))
1013 return atom;
1014 }
1015 }
1016
1017 return nullptr;
1018 }
1019
1020 /** \brief Move all chapter atoms to another container keeping editions intact
1021
1022 This function moves all chapter atoms from \a src to \a dst.
1023 If there's already an edition in \a dst with the same UID as the current
1024 one in \a src, then all atoms will be put into that edition. Otherwise
1025 the complete edition will simply be moved over.
1026
1027 After processing \a src will be empty.
1028
1029 Its parameters don't have to be checked for validity.
1030
1031 \param dst The container the atoms and editions will be put into.
1032 \param src The container the atoms and editions will be taken from.
1033 */
1034 void
move_by_edition(KaxChapters & dst,KaxChapters & src)1035 move_by_edition(KaxChapters &dst,
1036 KaxChapters &src) {
1037 size_t src_idx;
1038 for (src_idx = 0; src.ListSize() > src_idx; src_idx++) {
1039 auto m = dynamic_cast<EbmlMaster *>(src[src_idx]);
1040 if (!m)
1041 continue;
1042
1043 // Find an edition to which these atoms will be added.
1044 KaxEditionEntry *ee_dst = nullptr;
1045 auto euid_src = FindChild<KaxEditionUID>(m);
1046 if (euid_src)
1047 ee_dst = find_edition_with_uid(dst, euid_src->GetValue());
1048
1049 // No edition with the same UID found as the one we want to handle?
1050 // Then simply move the complete edition over.
1051 if (!ee_dst)
1052 dst.PushElement(*m);
1053 else {
1054 // Move all atoms from the old edition to the new one.
1055 size_t master_idx;
1056 for (master_idx = 0; m->ListSize() > master_idx; master_idx++)
1057 if (Is<KaxChapterAtom>((*m)[master_idx]))
1058 ee_dst->PushElement(*(*m)[master_idx]);
1059 else
1060 delete (*m)[master_idx];
1061
1062 m->RemoveAll();
1063 delete m;
1064 }
1065 }
1066
1067 src.RemoveAll();
1068 }
1069
1070 /** \brief Adjust all start and end timestamps by an offset
1071
1072 All start and end timestamps are adjusted by an offset. This is done
1073 recursively.
1074
1075 Its parameters don't have to be checked for validity.
1076
1077 \param master A master containint the elements to adjust. This can be
1078 a KaxChapters, KaxEditionEntry or KaxChapterAtom object.
1079 \param offset The offset to add to each timestamp. Can be negative. If
1080 the resulting timestamp would be smaller than zero then it will be set
1081 to zero.
1082 */
1083 void
adjust_timestamps(EbmlMaster & master,int64_t offset,mtx_mp_rational_t const & factor)1084 adjust_timestamps(EbmlMaster &master,
1085 int64_t offset,
1086 mtx_mp_rational_t const &factor) {
1087 size_t master_idx;
1088 for (master_idx = 0; master.ListSize() > master_idx; master_idx++) {
1089 if (!Is<KaxChapterAtom>(master[master_idx]))
1090 continue;
1091
1092 auto atom = static_cast<KaxChapterAtom *>(master[master_idx]);
1093 auto start = FindChild<KaxChapterTimeStart>(atom);
1094 auto end = FindChild<KaxChapterTimeEnd>(atom);
1095
1096 if (start)
1097 start->SetValue(std::max<int64_t>(mtx::to_int(factor * mtx_mp_rational_t{start->GetValue()}) + offset, 0));
1098
1099 if (end)
1100 end->SetValue(std::max<int64_t>(mtx::to_int(factor * mtx_mp_rational_t{end->GetValue()}) + offset, 0));
1101 }
1102
1103 for (master_idx = 0; master.ListSize() > master_idx; master_idx++) {
1104 auto work_master = dynamic_cast<EbmlMaster *>(master[master_idx]);
1105 if (work_master)
1106 adjust_timestamps(*work_master, offset, factor);
1107 }
1108 }
1109
1110 static int
count_atoms_recursively(EbmlMaster & master,int count)1111 count_atoms_recursively(EbmlMaster &master,
1112 int count) {
1113 size_t master_idx;
1114
1115 for (master_idx = 0; master.ListSize() > master_idx; ++master_idx)
1116 if (Is<KaxChapterAtom>(master[master_idx]))
1117 ++count;
1118
1119 else if (dynamic_cast<EbmlMaster *>(master[master_idx]))
1120 count = count_atoms_recursively(*static_cast<EbmlMaster *>(master[master_idx]), count);
1121
1122 return count;
1123 }
1124
1125 int
count_atoms(EbmlMaster & master)1126 count_atoms(EbmlMaster &master) {
1127 return count_atoms_recursively(master, 0);
1128 }
1129
1130 /** \brief Change the chapter edition UIDs to a single value
1131
1132 This function changes the UIDs of all editions for which the
1133 function is called to a single value. This is intended for chapters
1134 read from source files which do not provide their own edition UIDs
1135 (e.g. MP4 or OGM files) so that their chapters can be appended and
1136 don't end up in separate editions.
1137
1138 \c chapters may be nullptr in which case nothing is done.
1139
1140 \param dst chapters The chapter structure for which all edition
1141 UIDs will be changed.
1142 */
1143 void
align_uids(KaxChapters * chapters)1144 align_uids(KaxChapters *chapters) {
1145 if (!chapters)
1146 return;
1147
1148 static uint64_t s_shared_edition_uid = 0;
1149
1150 if (0 == s_shared_edition_uid)
1151 s_shared_edition_uid = create_unique_number(UNIQUE_CHAPTER_IDS);
1152
1153 size_t idx;
1154 for (idx = 0; chapters->ListSize() > idx; ++idx) {
1155 auto edition_entry = dynamic_cast<KaxEditionEntry *>((*chapters)[idx]);
1156 if (!edition_entry)
1157 continue;
1158
1159 GetChild<KaxEditionUID>(*edition_entry).SetValue(s_shared_edition_uid);
1160 }
1161 }
1162
1163 void
align_uids(KaxChapters & reference,KaxChapters & modify)1164 align_uids(KaxChapters &reference,
1165 KaxChapters &modify) {
1166 size_t reference_idx = 0, modify_idx = 0;
1167
1168 while (true) {
1169 KaxEditionEntry *ee_reference = nullptr;;
1170 while ((reference.ListSize() > reference_idx) && !(ee_reference = dynamic_cast<KaxEditionEntry *>(reference[reference_idx])))
1171 ++reference_idx;
1172
1173 if (!ee_reference)
1174 return;
1175
1176 KaxEditionEntry *ee_modify = nullptr;;
1177 while ((modify.ListSize() > modify_idx) && !(ee_modify = dynamic_cast<KaxEditionEntry *>(modify[modify_idx])))
1178 ++modify_idx;
1179
1180 if (!ee_modify)
1181 return;
1182
1183 GetChild<KaxEditionUID>(*ee_modify).SetValue(GetChild<KaxEditionUID>(*ee_reference).GetValue());
1184 ++reference_idx;
1185 ++modify_idx;
1186 }
1187 }
1188
1189 static void
regenerate_uids_worker(EbmlMaster & master,std::unordered_map<uint64_t,uint64_t> & new_chapter_uids)1190 regenerate_uids_worker(EbmlMaster &master,
1191 std::unordered_map<uint64_t, uint64_t> &new_chapter_uids) {
1192 for (int idx = 0, end = master.ListSize(); end > idx; ++idx) {
1193 auto element = master[idx];
1194 auto edition_uid = dynamic_cast<KaxEditionUID *>(element);
1195
1196 if (edition_uid) {
1197 edition_uid->SetValue(create_unique_number(UNIQUE_EDITION_IDS));
1198 continue;
1199 }
1200
1201 auto chapter_uid = dynamic_cast<KaxChapterUID *>(element);
1202
1203 if (chapter_uid) {
1204 new_chapter_uids[chapter_uid->GetValue()] = create_unique_number(UNIQUE_CHAPTER_IDS);
1205 chapter_uid->SetValue(new_chapter_uids[chapter_uid->GetValue()]);
1206 continue;
1207 }
1208
1209 auto sub_master = dynamic_cast<EbmlMaster *>(master[idx]);
1210 if (sub_master)
1211 regenerate_uids_worker(*sub_master, new_chapter_uids);
1212 }
1213 }
1214
1215 void
regenerate_uids(EbmlMaster & master,EbmlMaster * tags)1216 regenerate_uids(EbmlMaster &master,
1217 EbmlMaster *tags) {
1218 std::unordered_map<uint64_t, uint64_t> new_chapter_uids;
1219
1220 regenerate_uids_worker(master, new_chapter_uids);
1221
1222 if (tags)
1223 change_values<KaxTagChapterUID>(*tags, new_chapter_uids);
1224 }
1225
1226 std::string
format_name_template(std::string const & name_template,int chapter_number,timestamp_c const & start_timestamp,std::string const & appended_file_name)1227 format_name_template(std::string const &name_template,
1228 int chapter_number,
1229 timestamp_c const &start_timestamp,
1230 std::string const &appended_file_name) {
1231 auto name = name_template;
1232 auto number_re = QRegularExpression{"<NUM(?::(\\d+))?>"};
1233 auto timestamp_re = QRegularExpression{"<START(?::([^>]+))?>"};
1234 auto file_name_re = QRegularExpression{"<FILE_NAME>"};
1235 auto file_name_ext_re = QRegularExpression{"<FILE_NAME_WITH_EXT>"};
1236 auto appended_file_name_p = mtx::fs::to_path(appended_file_name);
1237
1238 name = mtx::string::replace(name, number_re, [=](auto const &match) {
1239 auto number_str = fmt::format("{0}", chapter_number);
1240 auto wanted_length = 1u;
1241
1242 if (match.capturedLength(1) && !mtx::string::parse_number(to_utf8(match.captured(1)), wanted_length))
1243 wanted_length = 1;
1244
1245 if (number_str.length() < wanted_length)
1246 number_str = std::string(wanted_length - number_str.length(), '0') + number_str;
1247
1248 return Q(number_str);
1249 });
1250
1251 name = mtx::string::replace(name, timestamp_re, [=](auto const &match) {
1252 auto format = match.capturedLength(1) ? to_utf8(match.captured(1)) : "%H:%M:%S"s;
1253 return Q(mtx::string::format_timestamp(start_timestamp.to_ns(), format));
1254 });
1255
1256 return to_utf8(Q(name)
1257 .replace(file_name_re, Q(appended_file_name_p.stem()))
1258 .replace(file_name_ext_re, Q(appended_file_name_p.filename())));
1259 }
1260
1261 void
fix_country_codes(EbmlMaster & chapters)1262 fix_country_codes(EbmlMaster &chapters) {
1263 for (auto const &child : chapters) {
1264 auto sub_master = dynamic_cast<EbmlMaster *>(child);
1265 if (sub_master) {
1266 fix_country_codes(*sub_master);
1267 continue;
1268 }
1269
1270 auto ccountry = dynamic_cast<KaxChapterCountry *>(child);
1271 if (!ccountry)
1272 continue;
1273
1274 auto country_opt = mtx::iso3166::look_up_cctld(ccountry->GetValue());
1275 if (country_opt)
1276 ccountry->SetValue(mtx::string::to_lower_ascii(country_opt->alpha_2_code));
1277 }
1278 }
1279
1280 std::shared_ptr<libmatroska::KaxChapters>
create_editions_and_chapters(std::vector<std::vector<timestamp_c>> const & editions_timestamps,mtx::bcp47::language_c const & language,std::string const & name_template)1281 create_editions_and_chapters(std::vector<std::vector<timestamp_c>> const &editions_timestamps,
1282 mtx::bcp47::language_c const &language,
1283 std::string const &name_template) {
1284 auto chapters = std::make_shared<libmatroska::KaxChapters>();
1285 auto use_name_template = !name_template.empty() ? name_template
1286 : g_chapter_generation_name_template.get_translated();
1287 auto use_language = language.is_valid() ? language
1288 : g_default_language.is_valid() ? g_default_language
1289 : mtx::bcp47::language_c::parse("eng");
1290
1291 for (auto const ×tamps : editions_timestamps) {
1292 auto edition = new libmatroska::KaxEditionEntry;
1293 auto chapter_number = 0u;
1294
1295 chapters->PushElement(*edition);
1296
1297 GetChild<libmatroska::KaxEditionUID>(edition).SetValue(create_unique_number(UNIQUE_EDITION_IDS));
1298
1299 for (auto const ×tamp : timestamps) {
1300 ++chapter_number;
1301
1302 auto name = format_name_template(use_name_template, chapter_number, timestamp);
1303 auto atom = mtx::construct::cons<libmatroska::KaxChapterAtom>(new libmatroska::KaxChapterUID, create_unique_number(UNIQUE_CHAPTER_IDS),
1304 new libmatroska::KaxChapterTimeStart, timestamp.to_ns());
1305
1306 if (!name.empty())
1307 atom->PushElement(*mtx::construct::cons<libmatroska::KaxChapterDisplay>(new libmatroska::KaxChapterString, name,
1308 new libmatroska::KaxChapterLanguage, use_language.get_iso639_2_alpha_3_code_or("und"),
1309 new libmatroska::KaxChapLanguageIETF, use_language.format()));
1310
1311 edition->PushElement(*atom);
1312 }
1313 }
1314
1315 return chapters;
1316 }
1317
1318 void
set_languages_in_display(libmatroska::KaxChapterDisplay & display,std::vector<mtx::bcp47::language_c> const & parsed_languages)1319 set_languages_in_display(libmatroska::KaxChapterDisplay &display,
1320 std::vector<mtx::bcp47::language_c> const &parsed_languages) {
1321 DeleteChildren<libmatroska::KaxChapLanguageIETF>(display);
1322 DeleteChildren<libmatroska::KaxChapterLanguage>(display);
1323 DeleteChildren<libmatroska::KaxChapterCountry>(display);
1324
1325 for (auto const &parsed_language : parsed_languages)
1326 if (parsed_language.is_valid())
1327 AddEmptyChild<libmatroska::KaxChapLanguageIETF>(display).SetValue(parsed_language.format());
1328
1329 unify_legacy_and_bcp47_languages_and_countries(display);
1330 }
1331
1332 void
set_languages_in_display(libmatroska::KaxChapterDisplay & display,mtx::bcp47::language_c const & parsed_language)1333 set_languages_in_display(libmatroska::KaxChapterDisplay &display,
1334 mtx::bcp47::language_c const &parsed_language) {
1335 if (parsed_language.is_valid())
1336 set_languages_in_display(display, std::vector<mtx::bcp47::language_c>{ parsed_language });
1337 }
1338
1339 void
set_languages_in_display(libmatroska::KaxChapterDisplay & display,std::string const & language)1340 set_languages_in_display(libmatroska::KaxChapterDisplay &display,
1341 std::string const &language) {
1342 set_languages_in_display(display, std::vector<mtx::bcp47::language_c>{ mtx::bcp47::language_c::parse(language) });
1343 }
1344
1345 mtx::bcp47::language_c
get_language_from_display(libmatroska::KaxChapterDisplay & display,std::string const & default_if_missing)1346 get_language_from_display(libmatroska::KaxChapterDisplay &display,
1347 std::string const &default_if_missing) {
1348 auto language = FindChildValue<libmatroska::KaxChapLanguageIETF>(display);
1349 if (language.empty())
1350 language = FindChildValue<libmatroska::KaxChapterLanguage>(display);
1351
1352 return mtx::bcp47::language_c::parse(!language.empty() ? language : default_if_missing);
1353 }
1354
1355 }
1356