1 /*
2 mkvmerge -- utility for splicing together matroska files
3 from component media subtypes
4
5 Distributed under the GPL v2
6 see the file COPYING for details
7 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8
9 Subripper subtitle reader
10
11 Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13
14 #include "common/common_pch.h"
15
16 #include <matroska/KaxTracks.h>
17
18 #include "common/codec.h"
19 #include "common/debugging.h"
20 #include "common/strings/editing.h"
21 #include "common/strings/parsing.h"
22 #include "common/strings/utf8.h"
23 #include "merge/connection_checks.h"
24 #include "merge/output_control.h"
25 #include "merge/packet_extensions.h"
26 #include "output/p_textsubs.h"
27
28 using namespace libmatroska;
29
textsubs_packetizer_c(generic_reader_c * p_reader,track_info_c & p_ti,const char * codec_id,bool recode)30 textsubs_packetizer_c::textsubs_packetizer_c(generic_reader_c *p_reader,
31 track_info_c &p_ti,
32 const char *codec_id,
33 bool recode)
34 : generic_packetizer_c(p_reader, p_ti)
35 , m_codec_id{codec_id}
36 {
37 if (recode) {
38 m_cc_utf8 = charset_converter_c::init(m_ti.m_sub_charset);
39 m_converter_is_utf8 = charset_converter_c::is_utf8_charset_name(m_cc_utf8->get_charset());
40 m_try_utf8 = m_ti.m_sub_charset.empty();
41 }
42
43 set_track_type(track_subtitle);
44 if (m_codec_id == MKV_S_TEXTUSF)
45 set_default_compression_method(COMPRESSION_ZLIB);
46
47 auto arg = std::string{};
48 if (!debugging_c::requested("textsubs_force_rerender", &arg))
49 return;
50
51 auto tid_and_packetno = mtx::string::split(arg, ":");
52 auto tid = int64_t{};
53 if (!mtx::string::parse_number(tid_and_packetno[0], tid) || (tid != m_ti.m_id))
54 return;
55
56 unsigned int packetno{};
57 mtx::string::parse_number(tid_and_packetno[1], packetno);
58 m_force_rerender_track_headers_on_packetno = packetno;
59
60 mxdebug(fmt::format("textsubs_packetizer_c: track {0}: forcing rerendering of track headers after packet {1}\n", tid, packetno));
61 }
62
~textsubs_packetizer_c()63 textsubs_packetizer_c::~textsubs_packetizer_c() {
64 }
65
66 void
set_headers()67 textsubs_packetizer_c::set_headers() {
68 set_codec_id(m_codec_id);
69 set_codec_private(m_ti.m_private_data);
70
71 generic_packetizer_c::set_headers();
72
73 m_track_entry->EnableLacing(false);
74 }
75
76 void
set_line_ending_style(mtx::string::line_ending_style_e line_ending_style)77 textsubs_packetizer_c::set_line_ending_style(mtx::string::line_ending_style_e line_ending_style) {
78 m_line_ending_style = line_ending_style;
79 }
80
81 void
process_impl(packet_cptr const & packet)82 textsubs_packetizer_c::process_impl(packet_cptr const &packet) {
83 if (m_buffered_packet) {
84 m_buffered_packet->duration = packet->timestamp - m_buffered_packet->timestamp;
85 process_one_packet(m_buffered_packet);
86 m_buffered_packet.reset();
87 }
88
89 auto subs = recode(packet->data->to_string());
90 subs = mtx::string::normalize_line_endings(subs, m_line_ending_style);
91
92 mtx::string::strip_back(subs);
93
94 if (subs.empty())
95 return;
96
97 packet->data = memory_c::clone(subs);
98
99 packet->force_key_frame();
100
101 if (0 <= packet->duration)
102 process_one_packet(packet);
103
104 else {
105 m_buffered_packet = packet;
106 m_buffered_packet->data->take_ownership();
107 }
108 }
109
110 std::string
recode(std::string subs)111 textsubs_packetizer_c::recode(std::string subs) {
112 if (m_try_utf8 && !mtx::utf8::is_valid(subs))
113 m_try_utf8 = false;
114
115 auto emit_invalid_utf8_warning = false;
116
117 if (!m_try_utf8 && m_cc_utf8) {
118 if (!m_invalid_utf8_warned && m_converter_is_utf8 && !mtx::utf8::is_valid(subs))
119 emit_invalid_utf8_warning = true;
120
121 subs = m_cc_utf8->utf8(subs);
122
123 } else if (!m_invalid_utf8_warned && !mtx::utf8::is_valid(subs))
124 emit_invalid_utf8_warning = true;
125
126 if (emit_invalid_utf8_warning) {
127 m_invalid_utf8_warned = true;
128 mxwarn_tid(m_ti.m_fname, m_ti.m_id, fmt::format(Y("This text subtitle track contains invalid 8-bit characters outside valid multi-byte UTF-8 sequences. Please specify the correct encoding for this track.\n")));
129 }
130
131 return subs;
132 }
133
134 void
process_one_packet(packet_cptr const & packet)135 textsubs_packetizer_c::process_one_packet(packet_cptr const &packet) {
136 ++m_packetno;
137
138 if (0 > packet->duration) {
139 subtitle_number_packet_extension_c *extension = dynamic_cast<subtitle_number_packet_extension_c *>(packet->find_extension(packet_extension_c::SUBTITLE_NUMBER));
140 mxwarn_tid(m_ti.m_fname, m_ti.m_id, fmt::format(Y("Ignoring an entry which starts after it ends ({0}).\n"), extension ? extension->get_number() : static_cast<unsigned int>(m_packetno)));
141 return;
142 }
143
144 packet->duration_mandatory = true;
145
146 add_packet(packet);
147
148 if (m_force_rerender_track_headers_on_packetno && (*m_force_rerender_track_headers_on_packetno == m_packetno)) {
149 auto codec_private = memory_c::alloc(20000);
150 std::memset(codec_private->get_buffer(), 0, codec_private->get_size());
151 set_codec_private(codec_private);
152 rerender_track_headers();
153 }
154 }
155
156 connection_result_e
can_connect_to(generic_packetizer_c * src,std::string & error_message)157 textsubs_packetizer_c::can_connect_to(generic_packetizer_c *src,
158 std::string &error_message) {
159 textsubs_packetizer_c *psrc = dynamic_cast<textsubs_packetizer_c *>(src);
160 if (!psrc)
161 return CAN_CONNECT_NO_FORMAT;
162
163 connect_check_codec_private(src);
164
165 return CAN_CONNECT_YES;
166 }
167