1import re 2from .formatbase import FormatBase 3from .ssaevent import SSAEvent 4from .ssastyle import SSAStyle 5from .substation import parse_tags 6from .exceptions import ContentNotUsable 7from .time import ms_to_times, make_time, TIMESTAMP, timestamp_to_ms 8 9#: Largest timestamp allowed in SubRip, ie. 99:59:59,999. 10MAX_REPRESENTABLE_TIME = make_time(h=100) - 1 11 12def ms_to_timestamp(ms): 13 """Convert ms to 'HH:MM:SS,mmm'""" 14 # XXX throw on overflow/underflow? 15 if ms < 0: ms = 0 16 if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME 17 h, m, s, ms = ms_to_times(ms) 18 return "%02d:%02d:%02d,%03d" % (h, m, s, ms) 19 20 21class SubripFormat(FormatBase): 22 """SubRip Text (SRT) subtitle format implementation""" 23 TIMESTAMP = TIMESTAMP 24 25 @staticmethod 26 def timestamp_to_ms(groups): 27 return timestamp_to_ms(groups) 28 29 @classmethod 30 def guess_format(cls, text): 31 """See :meth:`pysubs2.formats.FormatBase.guess_format()`""" 32 if "[Script Info]" in text or "[V4+ Styles]" in text: 33 # disambiguation vs. SSA/ASS 34 return None 35 36 if text.lstrip().startswith("WEBVTT"): 37 # disambiguation vs. WebVTT 38 return None 39 40 for line in text.splitlines(): 41 if len(cls.TIMESTAMP.findall(line)) == 2: 42 return "srt" 43 44 @classmethod 45 def from_file(cls, subs, fp, format_, keep_unknown_html_tags=False, **kwargs): 46 """ 47 See :meth:`pysubs2.formats.FormatBase.from_file()` 48 49 Supported tags: 50 51 - ``<i>`` 52 - ``<u>`` 53 - ``<s>`` 54 55 Keyword args: 56 keep_unknown_html_tags: If True, HTML tags other than i/u/s will be kept as-is. 57 Otherwise, they will be stripped from input. 58 """ 59 timestamps = [] # (start, end) 60 following_lines = [] # contains lists of lines following each timestamp 61 62 for line in fp: 63 stamps = cls.TIMESTAMP.findall(line) 64 if len(stamps) == 2: # timestamp line 65 start, end = map(cls.timestamp_to_ms, stamps) 66 timestamps.append((start, end)) 67 following_lines.append([]) 68 else: 69 if timestamps: 70 following_lines[-1].append(line) 71 72 def prepare_text(lines): 73 # Handle the "happy" empty subtitle case, which is timestamp line followed by blank line(s) 74 # followed by number line and timestamp line of the next subtitle. Fixes issue #11. 75 if (len(lines) >= 2 76 and all(re.match(r"\s*$", line) for line in lines[:-1]) 77 and re.match(r"\s*\d+\s*$", lines[-1])): 78 return "" 79 80 # Handle the general case. 81 s = "".join(lines).strip() 82 s = re.sub(r"\n+ *\d+ *$", "", s) # strip number of next subtitle 83 s = re.sub(r"< *i *>", r"{\\i1}", s) 84 s = re.sub(r"< */ *i *>", r"{\\i0}", s) 85 s = re.sub(r"< *s *>", r"{\\s1}", s) 86 s = re.sub(r"< */ *s *>", r"{\\s0}", s) 87 s = re.sub(r"< *u *>", "{\\\\u1}", s) # not r" for Python 2.7 compat, triggers unicodeescape 88 s = re.sub(r"< */ *u *>", "{\\\\u0}", s) 89 if not keep_unknown_html_tags: 90 s = re.sub(r"< */? *[a-zA-Z][^>]*>", "", s) # strip other HTML tags 91 s = re.sub(r"\n", r"\\N", s) # convert newlines 92 return s 93 94 subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines)) 95 for (start, end), lines in zip(timestamps, following_lines)] 96 97 @classmethod 98 def to_file(cls, subs, fp, format_, apply_styles=True, **kwargs): 99 """ 100 See :meth:`pysubs2.formats.FormatBase.to_file()` 101 102 Italic, underline and strikeout styling is supported. 103 104 Keyword args: 105 apply_styles: If False, do not write any styling. 106 107 """ 108 def prepare_text(text, style): 109 body = [] 110 for fragment, sty in parse_tags(text, style, subs.styles): 111 fragment = fragment.replace(r"\h", " ") 112 fragment = fragment.replace(r"\n", "\n") 113 fragment = fragment.replace(r"\N", "\n") 114 if apply_styles: 115 if sty.italic: fragment = "<i>%s</i>" % fragment 116 if sty.underline: fragment = "<u>%s</u>" % fragment 117 if sty.strikeout: fragment = "<s>%s</s>" % fragment 118 if sty.drawing: raise ContentNotUsable 119 body.append(fragment) 120 121 return re.sub("\n+", "\n", "".join(body).strip()) 122 123 visible_lines = (line for line in subs if not line.is_comment) 124 125 lineno = 1 126 for line in visible_lines: 127 start = ms_to_timestamp(line.start) 128 end = ms_to_timestamp(line.end) 129 try: 130 text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)) 131 except ContentNotUsable: 132 continue 133 134 print("%d" % lineno, file=fp) # Python 2.7 compat 135 print(start, "-->", end, file=fp) 136 print(text, end="\n\n", file=fp) 137 lineno += 1 138