1from contextlib import suppress 2import re 3from typing import Iterable, NamedTuple 4 5from .color import Color 6from .style import Style 7from .text import Text 8 9re_ansi = re.compile(r"(?:\x1b\[(.*?)m)|(?:\x1b\](.*?)\x1b\\)") 10re_csi = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") 11 12 13class _AnsiToken(NamedTuple): 14 """Result of ansi tokenized string.""" 15 16 plain: str = "" 17 sgr: str = "" 18 osc: str = "" 19 20 21def _ansi_tokenize(ansi_text: str) -> Iterable[_AnsiToken]: 22 """Tokenize a string in to plain text and ANSI codes. 23 24 Args: 25 ansi_text (str): A String containing ANSI codes. 26 27 Yields: 28 AnsiToken: A named tuple of (plain, sgr, osc) 29 """ 30 31 def remove_csi(ansi_text: str) -> str: 32 """Remove unknown CSI sequences.""" 33 return re_csi.sub("", ansi_text) 34 35 position = 0 36 for match in re_ansi.finditer(ansi_text): 37 start, end = match.span(0) 38 sgr, osc = match.groups() 39 if start > position: 40 yield _AnsiToken(remove_csi(ansi_text[position:start])) 41 yield _AnsiToken("", sgr, osc) 42 position = end 43 if position < len(ansi_text): 44 yield _AnsiToken(remove_csi(ansi_text[position:])) 45 46 47SGR_STYLE_MAP = { 48 1: "bold", 49 2: "dim", 50 3: "italic", 51 4: "underline", 52 5: "blink", 53 6: "blink2", 54 7: "reverse", 55 8: "conceal", 56 9: "strike", 57 21: "underline2", 58 22: "not dim not bold", 59 23: "not italic", 60 24: "not underline", 61 25: "not blink", 62 26: "not blink2", 63 27: "not reverse", 64 28: "not conceal", 65 29: "not strike", 66 30: "color(0)", 67 31: "color(1)", 68 32: "color(2)", 69 33: "color(3)", 70 34: "color(4)", 71 35: "color(5)", 72 36: "color(6)", 73 37: "color(7)", 74 39: "default", 75 40: "on color(0)", 76 41: "on color(1)", 77 42: "on color(2)", 78 43: "on color(3)", 79 44: "on color(4)", 80 45: "on color(5)", 81 46: "on color(6)", 82 47: "on color(7)", 83 49: "on default", 84 51: "frame", 85 52: "encircle", 86 53: "overline", 87 54: "not frame not encircle", 88 55: "not overline", 89 90: "color(8)", 90 91: "color(9)", 91 92: "color(10)", 92 93: "color(11)", 93 94: "color(12)", 94 95: "color(13)", 95 96: "color(14)", 96 97: "color(15)", 97 100: "on color(8)", 98 101: "on color(9)", 99 102: "on color(10)", 100 103: "on color(11)", 101 104: "on color(12)", 102 105: "on color(13)", 103 106: "on color(14)", 104 107: "on color(15)", 105} 106 107 108class AnsiDecoder: 109 """Translate ANSI code in to styled Text.""" 110 111 def __init__(self) -> None: 112 self.style = Style.null() 113 114 def decode(self, terminal_text: str) -> Iterable[Text]: 115 """Decode ANSI codes in an interable of lines. 116 117 Args: 118 lines (Iterable[str]): An iterable of lines of terminal output. 119 120 Yields: 121 Text: Marked up Text. 122 """ 123 for line in terminal_text.splitlines(): 124 yield self.decode_line(line) 125 126 def decode_line(self, line: str) -> Text: 127 """Decode a line containing ansi codes. 128 129 Args: 130 line (str): A line of terminal output. 131 132 Returns: 133 Text: A Text instance marked up according to ansi codes. 134 """ 135 from_ansi = Color.from_ansi 136 from_rgb = Color.from_rgb 137 _Style = Style 138 text = Text() 139 append = text.append 140 line = line.rsplit("\r", 1)[-1] 141 for token in _ansi_tokenize(line): 142 plain_text, sgr, osc = token 143 if plain_text: 144 append(plain_text, self.style or None) 145 elif osc: 146 if osc.startswith("8;"): 147 _params, semicolon, link = osc[2:].partition(";") 148 if semicolon: 149 self.style = self.style.update_link(link or None) 150 elif sgr: 151 # Translate in to semi-colon separated codes 152 # Ignore invalid codes, because we want to be lenient 153 codes = [ 154 min(255, int(_code)) for _code in sgr.split(";") if _code.isdigit() 155 ] 156 iter_codes = iter(codes) 157 for code in iter_codes: 158 if code == 0: 159 # reset 160 self.style = _Style.null() 161 elif code in SGR_STYLE_MAP: 162 # styles 163 self.style += _Style.parse(SGR_STYLE_MAP[code]) 164 elif code == 38: 165 # Foreground 166 with suppress(StopIteration): 167 color_type = next(iter_codes) 168 if color_type == 5: 169 self.style += _Style.from_color( 170 from_ansi(next(iter_codes)) 171 ) 172 elif color_type == 2: 173 self.style += _Style.from_color( 174 from_rgb( 175 next(iter_codes), 176 next(iter_codes), 177 next(iter_codes), 178 ) 179 ) 180 elif code == 48: 181 # Background 182 with suppress(StopIteration): 183 color_type = next(iter_codes) 184 if color_type == 5: 185 self.style += _Style.from_color( 186 None, from_ansi(next(iter_codes)) 187 ) 188 elif color_type == 2: 189 self.style += _Style.from_color( 190 None, 191 from_rgb( 192 next(iter_codes), 193 next(iter_codes), 194 next(iter_codes), 195 ), 196 ) 197 198 return text 199 200 201if __name__ == "__main__": # pragma: no cover 202 import pty 203 import io 204 import os 205 import sys 206 207 decoder = AnsiDecoder() 208 209 stdout = io.BytesIO() 210 211 def read(fd): 212 data = os.read(fd, 1024) 213 stdout.write(data) 214 return data 215 216 pty.spawn(sys.argv[1:], read) 217 218 from .console import Console 219 220 console = Console(record=True) 221 222 stdout_result = stdout.getvalue().decode("utf-8") 223 print(stdout_result) 224 225 for line in decoder.decode(stdout_result): 226 console.print(line) 227 228 console.save_html("stdout.html") 229