1 /*
2    mkvmerge -- utility for splicing together matroska files
3    from component media subtypes
4 
5    Distributed under the GPL v2
6    see the file COPYING for details
7    or visit https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
8 
9    class definition for the subtitle helper
10 
11    Written by Moritz Bunkus <moritz@bunkus.org>.
12 */
13 
14 #pragma once
15 
16 #include "common/common_pch.h"
17 
18 #include "merge/output_control.h"
19 #include "output/p_textsubs.h"
20 
21 struct sub_t {
22   int64_t start, end;
23   unsigned int number;
24   std::string subs;
25 
sub_tsub_t26   sub_t(int64_t _start, int64_t _end, unsigned int _number, const std::string &_subs):
27     start(_start), end(_end), number(_number), subs(_subs) {
28   }
29 
30   bool operator < (const sub_t &cmp) const {
31     return start < cmp.start;
32   }
33 };
34 
35 class subtitles_c {
36 protected:
37   std::deque<sub_t> entries;
38   std::deque<sub_t>::iterator current;
39   charset_converter_cptr m_cc_utf8;
40   bool m_try_utf8{}, m_invalid_utf8_warned{};
41   std::string m_file_name;
42   int64_t m_track_id{};
43 
44 public:
45   subtitles_c(std::string const &file_name, int64_t track_id);
add(int64_t start,int64_t end,unsigned int number,const std::string & subs)46   void add(int64_t start, int64_t end, unsigned int number, const std::string &subs) {
47     entries.push_back(sub_t(start, end, number, subs));
48   }
reset()49   void reset() {
50     current = entries.begin();
51   }
get_num_entries()52   int get_num_entries() {
53     return entries.size();
54   }
get_num_processed()55   int get_num_processed() {
56     return std::distance(entries.begin(), current);
57   }
58   void process(generic_packetizer_c *);
sort()59   void sort() {
60     std::stable_sort(entries.begin(), entries.end());
61     reset();
62   }
empty()63   bool empty() {
64     return current == entries.end();
65   }
66 
get_total_byte_size()67   int64_t get_total_byte_size() {
68     return std::accumulate(entries.begin(), entries.end(), 0ull, [](int64_t num, auto const &entry) { return num + entry.subs.length(); });
69   }
70 
get_next_byte_size()71   int64_t get_next_byte_size() {
72     if (empty() || (entries.end() == current))
73       return 0;
74     return current->subs.length();
75   }
76 
77   void set_charset_converter(charset_converter_cptr const &cc_utf8);
78   std::string recode(std::string const &s, uint32_t replacement_marker = 0xfffdu);
79 };
80 using subtitles_cptr = std::shared_ptr<subtitles_c>;
81 
82 class srt_parser_c: public subtitles_c {
83 public:
84   enum parser_state_e {
85     STATE_INITIAL,
86     STATE_SUBS,
87     STATE_SUBS_OR_NUMBER,
88     STATE_TIME,
89   };
90 
91 protected:
92   mm_text_io_cptr m_io;
93   bool m_coordinates_warning_shown;
94   debugging_option_c m_debug{"srt_parser"};
95 
96 public:
97   srt_parser_c(mm_text_io_cptr const &io, const std::string &file_name, int64_t track_id);
98   void parse();
99 
100 public:
101   static bool probe(mm_text_io_c &io);
102 };
103 using srt_parser_cptr = std::shared_ptr<srt_parser_c>;
104 
105 class ssa_parser_c: public subtitles_c {
106 public:
107   enum ssa_section_e {
108     SSA_SECTION_NONE,
109     SSA_SECTION_INFO,
110     SSA_SECTION_V4STYLES,
111     SSA_SECTION_EVENTS,
112     SSA_SECTION_GRAPHICS,
113     SSA_SECTION_FONTS
114   };
115 
116 protected:
117   generic_reader_c &m_reader;
118   mm_text_io_cptr m_io;
119   std::vector<std::string> m_format;
120   bool m_is_ass;
121   std::string m_global;
122   int64_t m_attachment_id;
123 
124 public:
125   std::vector<attachment_t> m_attachments;
126 
127 public:
128   ssa_parser_c(generic_reader_c &reader, mm_text_io_cptr const &io, const std::string &file_name, int64_t track_id);
129   void parse();
130 
is_ass()131   bool is_ass() {
132     return m_is_ass;
133   }
134 
get_global()135   std::string get_global() {
136     return m_global;
137   }
138 
set_attachment_id_base(int64_t id)139   void set_attachment_id_base(int64_t id) {
140     m_attachment_id = id;
141   }
142 
143 public:
144   static bool probe(mm_text_io_c &io);
145 
146 protected:
147   int64_t parse_time(std::string &time);
148   std::string get_element(const char *index, std::vector<std::string> &fields);
149   void add_attachment_maybe(std::string &name, std::string &data_uu, ssa_section_e section);
150   void decode_chars(unsigned char const *in, unsigned char *out, size_t bytes_in);
151 };
152 using ssa_parser_cptr = std::shared_ptr<ssa_parser_c>;
153