1 /*
2 mkvmerge -- utility for splicing together matroska files
3 from component media subtypes
4
5 Distributed under the GPL v2
6 see the file COPYING for details
7 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8
9 helper function for WebVTT data
10
11 Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13
14 #include "common/common_pch.h"
15
16 #include "common/qt.h"
17 #include "common/strings/editing.h"
18 #include "common/strings/formatting.h"
19 #include "common/strings/parsing.h"
20 #include "common/webvtt.h"
21
22 namespace mtx::webvtt {
23
24 constexpr auto RE_TIMESTAMP = "((?:\\d+:)?\\d{2}:\\d{2}\\.\\d{3})";
25
26 struct parser_c::impl_t {
27 public:
28 std::vector<std::string> current_block, global_blocks, local_blocks;
29 bool parsing_global_data{true};
30 std::deque<parser_c::cue_cptr> cues;
31 unsigned int current_cue_number{}, total_number_of_cues{}, total_number_of_bytes{};
32 debugging_option_c debug{"parser"};
33
34 QRegularExpression timestamp_line_re{Q(fmt::format("^[ \\t]*{0}[ \\t]+-->[ \\t]+{0}(?:[ \\t]+([^\\n]+))?$", RE_TIMESTAMP))};
35 };
36
parser_c()37 parser_c::parser_c()
38 : m{new impl_t()}
39 {
40 }
41
~parser_c()42 parser_c::~parser_c() { // NOLINT(modernize-use-equals-default) due to pimpl idiom requiring explicit dtor declaration somewhere
43 }
44
45 void
add_line(std::string const & line)46 parser_c::add_line(std::string const &line) {
47 auto tmp = mtx::string::chomp(line);
48
49 if (tmp.empty())
50 add_block();
51
52 else
53 m->current_block.emplace_back(std::move(tmp));
54 }
55
56 void
add_joined_lines(std::string const & joined_lines)57 parser_c::add_joined_lines(std::string const &joined_lines) {
58 auto lines = mtx::string::split(mtx::string::chomp(mtx::string::normalize_line_endings(joined_lines)), "\n");
59
60 for (auto const &line : lines)
61 add_line(line);
62 }
63
64 void
add_joined_lines(memory_c const & mem)65 parser_c::add_joined_lines(memory_c const &mem) {
66 if (!mem.get_size())
67 return;
68
69 add_joined_lines(mem.to_string());
70 }
71
72 void
flush()73 parser_c::flush() {
74 add_block();
75 m->parsing_global_data = false;
76 }
77
78 void
add_block()79 parser_c::add_block() {
80 if (m->current_block.empty())
81 return;
82
83 std::string label, additional;
84 auto timestamp_line = -1;
85 auto is_other = false;
86 QRegularExpressionMatch matches;
87
88 if (matches = m->timestamp_line_re.match(Q(m->current_block[0])); matches.hasMatch())
89 timestamp_line = 0;
90
91 else if (m->current_block.size() <= 1)
92 is_other = true;
93
94 else if (matches = m->timestamp_line_re.match(Q(m->current_block[1])); matches.hasMatch()) {
95 timestamp_line = 1;
96 label = std::move(m->current_block[0]);
97
98 } else
99 is_other = true;
100
101 if (is_other) {
102 auto content = mtx::string::join(m->current_block, "\n");
103 (m->parsing_global_data ? m->global_blocks : m->local_blocks).emplace_back(std::move(content));
104
105 m->current_block.clear();
106
107 return;
108 }
109
110 m->parsing_global_data = false;
111
112 timestamp_c start, end;
113 mtx::string::parse_timestamp(to_utf8(matches.captured(1)), start);
114 mtx::string::parse_timestamp(to_utf8(matches.captured(2)), end);
115
116 auto content = mtx::string::join(m->current_block.begin() + timestamp_line + 1, m->current_block.end(), "\n");
117 content = adjust_embedded_timestamps(content, start.negate());
118 auto cue = std::make_shared<cue_t>();
119 cue->m_start = start;
120 cue->m_duration = end - start;
121 cue->m_content = memory_c::clone(content);
122 auto settings_list = to_utf8(matches.captured(3));
123
124 if (! (label.empty() && settings_list.empty() && m->local_blocks.empty())) {
125 additional = settings_list + "\n" + label + "\n" + mtx::string::join(m->local_blocks, "\n");
126 cue->m_addition = memory_c::clone(additional);
127 }
128
129 mxdebug_if(m->debug,
130 fmt::format("label «{0}» start «{1}» end «{2}» settings list «{3}» additional «{4}» content «{5}»\n",
131 label, to_utf8(matches.captured(1)), to_utf8(matches.captured(2)), to_utf8(matches.captured(3)),
132 to_utf8(Q(additional).replace(QRegularExpression{"\n+"}, "–")),
133 to_utf8(Q(content) .replace(QRegularExpression{"\n+"}, "–"))));
134
135 m->local_blocks.clear();
136 m->current_block.clear();
137
138 m->total_number_of_bytes += cue->m_content->get_size() + (cue->m_addition ? cue->m_addition->get_size() : 0);
139
140 m->cues.emplace_back(cue);
141 }
142
143 bool
codec_private_available() const144 parser_c::codec_private_available()
145 const {
146 return !m->parsing_global_data;
147 }
148
149 memory_cptr
get_codec_private() const150 parser_c::get_codec_private()
151 const {
152 return memory_c::clone(mtx::string::join(m->global_blocks, "\n\n"));
153 }
154
155 bool
cue_available() const156 parser_c::cue_available()
157 const {
158 return !m->cues.empty();
159 }
160
161 parser_c::cue_cptr
get_cue()162 parser_c::get_cue() {
163 auto cue = m->cues.front();
164 m->cues.pop_front();
165 return cue;
166 }
167
168 unsigned int
get_current_cue_number() const169 parser_c::get_current_cue_number()
170 const {
171 return m->current_cue_number;
172 }
173
174 unsigned int
get_total_number_of_cues() const175 parser_c::get_total_number_of_cues()
176 const {
177 return m->total_number_of_cues;
178 }
179
180 unsigned int
get_total_number_of_bytes() const181 parser_c::get_total_number_of_bytes()
182 const {
183 return m->total_number_of_bytes;
184 }
185
186 std::string
adjust_embedded_timestamps(std::string const & text,timestamp_c const & offset)187 parser_c::adjust_embedded_timestamps(std::string const &text,
188 timestamp_c const &offset) {
189 static std::optional<QRegularExpression> s_embedded_timestamp_re;
190
191 if (!s_embedded_timestamp_re)
192 s_embedded_timestamp_re = QRegularExpression{Q(fmt::format("<{0}>", RE_TIMESTAMP))};
193
194 return mtx::string::replace(text, *s_embedded_timestamp_re, [&offset](auto const &match) {
195 timestamp_c timestamp;
196 mtx::string::parse_timestamp(to_utf8(match.captured(1)), timestamp);
197 return Q(fmt::format("<{0}>", mtx::string::format_timestamp(timestamp + offset, 3)));
198 });
199 }
200
201 } // namespace mtx::webvtt
202