1 /* 2 mkvmerge -- utility for splicing together matroska files 3 from component media subtypes 4 5 Distributed under the GPL v2 6 see the file COPYING for details 7 or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html 8 9 class definition for the subtitle helper 10 11 Written by Moritz Bunkus <moritz@bunkus.org>. 12 */ 13 14 #pragma once 15 16 #include "common/common_pch.h" 17 18 #include "merge/output_control.h" 19 #include "output/p_textsubs.h" 20 21 struct sub_t { 22 int64_t start, end; 23 unsigned int number; 24 std::string subs; 25 sub_tsub_t26 sub_t(int64_t _start, int64_t _end, unsigned int _number, const std::string &_subs): 27 start(_start), end(_end), number(_number), subs(_subs) { 28 } 29 30 bool operator < (const sub_t &cmp) const { 31 return start < cmp.start; 32 } 33 }; 34 35 class subtitles_c { 36 protected: 37 std::deque<sub_t> entries; 38 std::deque<sub_t>::iterator current; 39 charset_converter_cptr m_cc_utf8; 40 bool m_try_utf8{}, m_invalid_utf8_warned{}; 41 std::string m_file_name; 42 int64_t m_track_id{}; 43 44 public: 45 subtitles_c(std::string const &file_name, int64_t track_id); add(int64_t start,int64_t end,unsigned int number,const std::string & subs)46 void add(int64_t start, int64_t end, unsigned int number, const std::string &subs) { 47 entries.push_back(sub_t(start, end, number, subs)); 48 } reset()49 void reset() { 50 current = entries.begin(); 51 } get_num_entries()52 int get_num_entries() { 53 return entries.size(); 54 } get_num_processed()55 int get_num_processed() { 56 return std::distance(entries.begin(), current); 57 } 58 void process(generic_packetizer_c *); sort()59 void sort() { 60 std::stable_sort(entries.begin(), entries.end()); 61 reset(); 62 } empty()63 bool empty() { 64 return current == entries.end(); 65 } 66 get_total_byte_size()67 int64_t get_total_byte_size() { 68 return std::accumulate(entries.begin(), entries.end(), 0ull, [](int64_t num, auto const &entry) { return num + entry.subs.length(); }); 69 } 70 get_next_byte_size()71 int64_t get_next_byte_size() { 72 if (empty() || (entries.end() == current)) 73 return 0; 74 return current->subs.length(); 75 } 76 77 void set_charset_converter(charset_converter_cptr const &cc_utf8); 78 std::string recode(std::string const &s, uint32_t replacement_marker = 0xfffdu); 79 }; 80 using subtitles_cptr = std::shared_ptr<subtitles_c>; 81 82 class srt_parser_c: public subtitles_c { 83 public: 84 enum parser_state_e { 85 STATE_INITIAL, 86 STATE_SUBS, 87 STATE_SUBS_OR_NUMBER, 88 STATE_TIME, 89 }; 90 91 protected: 92 mm_text_io_cptr m_io; 93 bool m_coordinates_warning_shown; 94 debugging_option_c m_debug{"srt_parser"}; 95 96 public: 97 srt_parser_c(mm_text_io_cptr const &io, const std::string &file_name, int64_t track_id); 98 void parse(); 99 100 public: 101 static bool probe(mm_text_io_c &io); 102 }; 103 using srt_parser_cptr = std::shared_ptr<srt_parser_c>; 104 105 class ssa_parser_c: public subtitles_c { 106 public: 107 enum ssa_section_e { 108 SSA_SECTION_NONE, 109 SSA_SECTION_INFO, 110 SSA_SECTION_V4STYLES, 111 SSA_SECTION_EVENTS, 112 SSA_SECTION_GRAPHICS, 113 SSA_SECTION_FONTS 114 }; 115 116 protected: 117 generic_reader_c &m_reader; 118 mm_text_io_cptr m_io; 119 std::vector<std::string> m_format; 120 bool m_is_ass; 121 std::string m_global; 122 int64_t m_attachment_id; 123 124 public: 125 std::vector<attachment_t> m_attachments; 126 127 public: 128 ssa_parser_c(generic_reader_c &reader, mm_text_io_cptr const &io, const std::string &file_name, int64_t track_id); 129 void parse(); 130 is_ass()131 bool is_ass() { 132 return m_is_ass; 133 } 134 get_global()135 std::string get_global() { 136 return m_global; 137 } 138 set_attachment_id_base(int64_t id)139 void set_attachment_id_base(int64_t id) { 140 m_attachment_id = id; 141 } 142 143 public: 144 static bool probe(mm_text_io_c &io); 145 146 protected: 147 int64_t parse_time(std::string &time); 148 std::string get_element(const char *index, std::vector<std::string> &fields); 149 void add_attachment_maybe(std::string &name, std::string &data_uu, ssa_section_e section); 150 void decode_chars(unsigned char const *in, unsigned char *out, size_t bytes_in); 151 }; 152 using ssa_parser_cptr = std::shared_ptr<ssa_parser_c>; 153