1import re
2from .formatbase import FormatBase
3from .ssaevent import SSAEvent
4from .ssastyle import SSAStyle
5from .substation import parse_tags
6from .exceptions import ContentNotUsable
7from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms
8
9#: Largest timestamp allowed in SubRip, ie. 99:59:59,999.
10MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
11
12def ms_to_timestamp(ms):
13    """Convert ms to 'HH:MM:SS,mmm'"""
14    # XXX throw on overflow/underflow?
15    if ms < 0: ms = 0
16    if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME
17    h, m, s, ms = ms_to_times(ms)
18    return "%02d:%02d:%02d,%03d" % (h, m, s, ms)
19
20
21class SubripFormat(FormatBase):
22    """SubRip Text (SRT) subtitle format implementation"""
23    TIMESTAMP = TIMESTAMP
24
25    @staticmethod
26    def timestamp_to_ms(groups):
27        return timestamp_to_ms(groups)
28
29    @classmethod
30    def guess_format(cls, text):
31        """See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
32        if "[Script Info]" in text or "[V4+ Styles]" in text:
33            # disambiguation vs. SSA/ASS
34            return None
35
36        if text.lstrip().startswith("WEBVTT"):
37            # disambiguation vs. WebVTT
38            return None
39
40        for line in text.splitlines():
41            if len(cls.TIMESTAMP.findall(line)) == 2:
42                return "srt"
43
44    @classmethod
45    def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs):
46        """
47        See :meth:`pysubs2.formats.FormatBase.from_file()`
48
49        Supported tags:
50
51          - ``<i>``
52          - ``<u>``
53          - ``<s>``
54
55        Keyword args:
56            keep_unknown_html_tags: If True, HTML tags other than i/u/s will be kept as-is.
57                Otherwise, they will be stripped from input.
58        """
59        timestamps = [] # (start, end)
60        following_lines = [] # contains lists of lines following each timestamp
61
62        for line in fp:
63            stamps = cls.TIMESTAMP.findall(line)
64            if len(stamps) == 2: # timestamp line
65                start, end = map(cls.timestamp_to_ms, stamps)
66                timestamps.append((start, end))
67                following_lines.append([])
68            else:
69                if timestamps:
70                    following_lines[-1].append(line)
71
72        def prepare_text(lines):
73            # Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s)
74            # followed by number line and timestamp line of the next subtitle. Fixes issue #11.
75            if (len(lines) >= 2
76                    and all(re.match(r"\s*$", line) for line in lines[:-1])
77                    and re.match(r"\s*\d+\s*$", lines[-1])):
78                return ""
79
80            # Handle the general case.
81            s = "".join(lines).strip()
82            s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle
83            s = re.sub(r"< *i *>", r"{\\i1}", s)
84            s = re.sub(r"< */ *i *>", r"{\\i0}", s)
85            s = re.sub(r"< *s *>", r"{\\s1}", s)
86            s = re.sub(r"< */ *s *>", r"{\\s0}", s)
87            s = re.sub(r"< *u *>", "{\\\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape
88            s = re.sub(r"< */ *u *>", "{\\\\u0}", s)
89            if not keep_unknown_html_tags:
90                s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags
91            s = re.sub(r"\n", r"\\N", s) # convert newlines
92            return s
93
94        subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
95                       for (start, end), lines in zip(timestamps, following_lines)]
96
97    @classmethod
98    def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs):
99        """
100        See :meth:`pysubs2.formats.FormatBase.to_file()`
101
102        Italic, underline and strikeout styling is supported.
103
104        Keyword args:
105            apply_styles: If False, do not write any styling.
106
107        """
108        def prepare_text(text, style):
109            body = []
110            for fragment, sty in parse_tags(text, style, subs.styles):
111                fragment = fragment.replace(r"\h", " ")
112                fragment = fragment.replace(r"\n", "\n")
113                fragment = fragment.replace(r"\N", "\n")
114                if apply_styles:
115                    if sty.italic: fragment = "<i>%s</i>" % fragment
116                    if sty.underline: fragment = "<u>%s</u>" % fragment
117                    if sty.strikeout: fragment = "<s>%s</s>" % fragment
118                if sty.drawing: raise ContentNotUsable
119                body.append(fragment)
120
121            return re.sub("\n+", "\n", "".join(body).strip())
122
123        visible_lines = (line for line in subs if not line.is_comment)
124
125        lineno = 1
126        for line in visible_lines:
127            start = ms_to_timestamp(line.start)
128            end = ms_to_timestamp(line.end)
129            try:
130                text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
131            except ContentNotUsable:
132                continue
133
134            print("%d" % lineno, file=fp) # Python 2.7 compat
135            print(start, "-->", end, file=fp)
136            print(text, end="\n\n", file=fp)
137            lineno += 1
138