1 use core::str::Bytes;
2 use std::convert::TryFrom;
3 use std::iter;
4 use vte::{Params, ParamsIter};
5
6 pub struct AnsiElementIterator<'a> {
7 // The input bytes
8 bytes: Bytes<'a>,
9
10 // The state machine
11 machine: vte::Parser,
12
13 // Becomes non-None when the parser finishes parsing an ANSI sequence.
14 // This is never Element::Text.
15 element: Option<Element>,
16
17 // Number of text bytes seen since the last element was emitted.
18 text_length: usize,
19
20 // Byte offset of start of current element.
21 start: usize,
22
23 // Byte offset of most rightward byte processed so far
24 pos: usize,
25 }
26
27 struct Performer {
28 // Becomes non-None when the parser finishes parsing an ANSI sequence.
29 // This is never Element::Text.
30 element: Option<Element>,
31
32 // Number of text bytes seen since the last element was emitted.
33 text_length: usize,
34 }
35
36 #[derive(Clone, Debug, PartialEq)]
37 pub enum Element {
38 Csi(ansi_term::Style, usize, usize),
39 Esc(usize, usize),
40 Osc(usize, usize),
41 Text(usize, usize),
42 }
43
44 impl<'a> AnsiElementIterator<'a> {
new(s: &'a str) -> Self45 pub fn new(s: &'a str) -> Self {
46 Self {
47 machine: vte::Parser::new(),
48 bytes: s.bytes(),
49 element: None,
50 text_length: 0,
51 start: 0,
52 pos: 0,
53 }
54 }
55
56 #[allow(dead_code)]
dbg(s: &str)57 pub fn dbg(s: &str) {
58 for el in AnsiElementIterator::new(s) {
59 match el {
60 Element::Csi(_, i, j) => println!("CSI({}, {}, {:?})", i, j, &s[i..j]),
61 Element::Esc(i, j) => println!("ESC({}, {}, {:?})", i, j, &s[i..j]),
62 Element::Osc(i, j) => println!("OSC({}, {}, {:?})", i, j, &s[i..j]),
63 Element::Text(i, j) => println!("Text({}, {}, {:?})", i, j, &s[i..j]),
64 }
65 }
66 }
67 }
68
69 impl<'a> Iterator for AnsiElementIterator<'a> {
70 type Item = Element;
71
next(&mut self) -> Option<Element>72 fn next(&mut self) -> Option<Element> {
73 loop {
74 // If the last element emitted was text, then there may be a non-text element waiting
75 // to be emitted. In that case we do not consume a new byte.
76 let byte = if self.element.is_some() {
77 None
78 } else {
79 self.bytes.next()
80 };
81 if byte.is_some() || self.element.is_some() {
82 if let Some(byte) = byte {
83 let mut performer = Performer {
84 element: None,
85 text_length: 0,
86 };
87 self.machine.advance(&mut performer, byte);
88 self.element = performer.element;
89 self.text_length += performer.text_length;
90 self.pos += 1;
91 }
92 if self.element.is_some() {
93 // There is a non-text element waiting to be emitted, but it may have preceding
94 // text, which must be emitted first.
95 if self.text_length > 0 {
96 let start = self.start;
97 self.start += self.text_length;
98 self.text_length = 0;
99 return Some(Element::Text(start, self.start));
100 }
101 let start = self.start;
102 self.start = self.pos;
103 let element = match self.element.as_ref().unwrap() {
104 Element::Csi(style, _, _) => Element::Csi(*style, start, self.pos),
105 Element::Esc(_, _) => Element::Esc(start, self.pos),
106 Element::Osc(_, _) => Element::Osc(start, self.pos),
107 Element::Text(_, _) => unreachable!(),
108 };
109 self.element = None;
110 return Some(element);
111 }
112 } else if self.text_length > 0 {
113 self.text_length = 0;
114 return Some(Element::Text(self.start, self.pos));
115 } else {
116 return None;
117 }
118 }
119 }
120 }
121
122 // Based on https://github.com/alacritty/vte/blob/v0.9.0/examples/parselog.rs
123 impl vte::Perform for Performer {
csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char)124 fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) {
125 if ignore || intermediates.len() > 1 {
126 return;
127 }
128
129 if let ('m', None) = (c, intermediates.get(0)) {
130 if params.is_empty() {
131 // Attr::Reset
132 // Probably doesn't need to be handled: https://github.com/dandavison/delta/pull/431#discussion_r536883568
133 } else {
134 self.element = Some(Element::Csi(
135 ansi_term_style_from_sgr_parameters(&mut params.iter()),
136 0,
137 0,
138 ));
139 }
140 }
141 }
142
print(&mut self, c: char)143 fn print(&mut self, c: char) {
144 self.text_length += c.len_utf8();
145 }
146
execute(&mut self, byte: u8)147 fn execute(&mut self, byte: u8) {
148 // E.g. '\n'
149 if byte < 128 {
150 self.text_length += 1;
151 }
152 }
153
hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _c: char)154 fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _c: char) {}
155
put(&mut self, _byte: u8)156 fn put(&mut self, _byte: u8) {}
157
unhook(&mut self)158 fn unhook(&mut self) {}
159
osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool)160 fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {
161 self.element = Some(Element::Osc(0, 0));
162 }
163
esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8)164 fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {
165 self.element = Some(Element::Esc(0, 0));
166 }
167 }
168
169 // Based on https://github.com/alacritty/alacritty/blob/9e71002e40d5487c6fa2571a3a3c4f5c8f679334/alacritty_terminal/src/ansi.rs#L1175
ansi_term_style_from_sgr_parameters(params: &mut ParamsIter<'_>) -> ansi_term::Style170 fn ansi_term_style_from_sgr_parameters(params: &mut ParamsIter<'_>) -> ansi_term::Style {
171 let mut style = ansi_term::Style::new();
172 while let Some(param) = params.next() {
173 match param {
174 // [0] => Some(Attr::Reset),
175 [1] => style.is_bold = true,
176 [2] => style.is_dimmed = true,
177 [3] => style.is_italic = true,
178 [4, ..] => style.is_underline = true,
179 [5] => style.is_blink = true, // blink slow
180 [6] => style.is_blink = true, // blink fast
181 [7] => style.is_reverse = true,
182 [8] => style.is_hidden = true,
183 [9] => style.is_strikethrough = true,
184 // [21] => Some(Attr::CancelBold),
185 // [22] => Some(Attr::CancelBoldDim),
186 // [23] => Some(Attr::CancelItalic),
187 // [24] => Some(Attr::CancelUnderline),
188 // [25] => Some(Attr::CancelBlink),
189 // [27] => Some(Attr::CancelReverse),
190 // [28] => Some(Attr::CancelHidden),
191 // [29] => Some(Attr::CancelStrike),
192 [30] => style.foreground = Some(ansi_term::Color::Black),
193 [31] => style.foreground = Some(ansi_term::Color::Red),
194 [32] => style.foreground = Some(ansi_term::Color::Green),
195 [33] => style.foreground = Some(ansi_term::Color::Yellow),
196 [34] => style.foreground = Some(ansi_term::Color::Blue),
197 [35] => style.foreground = Some(ansi_term::Color::Purple),
198 [36] => style.foreground = Some(ansi_term::Color::Cyan),
199 [37] => style.foreground = Some(ansi_term::Color::White),
200 [38] => {
201 let mut iter = params.map(|param| param[0]);
202 if let Some(color) = parse_sgr_color(&mut iter) {
203 style.foreground = Some(color);
204 }
205 }
206 [38, params @ ..] => {
207 let rgb_start = if params.len() > 4 { 2 } else { 1 };
208 let rgb_iter = params[rgb_start..].iter().copied();
209 let mut iter = iter::once(params[0]).chain(rgb_iter);
210
211 if let Some(color) = parse_sgr_color(&mut iter) {
212 style.foreground = Some(color);
213 }
214 }
215 // [39] => Some(Attr::Foreground(Color::Named(NamedColor::Foreground))),
216 [40] => style.background = Some(ansi_term::Color::Black),
217 [41] => style.background = Some(ansi_term::Color::Red),
218 [42] => style.background = Some(ansi_term::Color::Green),
219 [43] => style.background = Some(ansi_term::Color::Yellow),
220 [44] => style.background = Some(ansi_term::Color::Blue),
221 [45] => style.background = Some(ansi_term::Color::Purple),
222 [46] => style.background = Some(ansi_term::Color::Cyan),
223 [47] => style.background = Some(ansi_term::Color::White),
224 [48] => {
225 let mut iter = params.map(|param| param[0]);
226 if let Some(color) = parse_sgr_color(&mut iter) {
227 style.background = Some(color);
228 }
229 }
230 [48, params @ ..] => {
231 let rgb_start = if params.len() > 4 { 2 } else { 1 };
232 let rgb_iter = params[rgb_start..].iter().copied();
233 let mut iter = iter::once(params[0]).chain(rgb_iter);
234 if let Some(color) = parse_sgr_color(&mut iter) {
235 style.background = Some(color);
236 }
237 }
238 // [49] => Some(Attr::Background(Color::Named(NamedColor::Background))),
239 // "bright" colors. ansi_term doesn't offer a way to emit them as, e.g., 90m; instead
240 // that would be 38;5;8.
241 [90] => style.foreground = Some(ansi_term::Color::Fixed(8)),
242 [91] => style.foreground = Some(ansi_term::Color::Fixed(9)),
243 [92] => style.foreground = Some(ansi_term::Color::Fixed(10)),
244 [93] => style.foreground = Some(ansi_term::Color::Fixed(11)),
245 [94] => style.foreground = Some(ansi_term::Color::Fixed(12)),
246 [95] => style.foreground = Some(ansi_term::Color::Fixed(13)),
247 [96] => style.foreground = Some(ansi_term::Color::Fixed(14)),
248 [97] => style.foreground = Some(ansi_term::Color::Fixed(15)),
249 [100] => style.background = Some(ansi_term::Color::Fixed(8)),
250 [101] => style.background = Some(ansi_term::Color::Fixed(9)),
251 [102] => style.background = Some(ansi_term::Color::Fixed(10)),
252 [103] => style.background = Some(ansi_term::Color::Fixed(11)),
253 [104] => style.background = Some(ansi_term::Color::Fixed(12)),
254 [105] => style.background = Some(ansi_term::Color::Fixed(13)),
255 [106] => style.background = Some(ansi_term::Color::Fixed(14)),
256 [107] => style.background = Some(ansi_term::Color::Fixed(15)),
257 _ => {}
258 };
259 }
260 style
261 }
262
263 // Based on https://github.com/alacritty/alacritty/blob/57c4ac9145a20fb1ae9a21102503458d3da06c7b/alacritty_terminal/src/ansi.rs#L1258
parse_sgr_color(params: &mut dyn Iterator<Item = u16>) -> Option<ansi_term::Color>264 fn parse_sgr_color(params: &mut dyn Iterator<Item = u16>) -> Option<ansi_term::Color> {
265 match params.next() {
266 Some(2) => {
267 let r = u8::try_from(params.next()?).ok()?;
268 let g = u8::try_from(params.next()?).ok()?;
269 let b = u8::try_from(params.next()?).ok()?;
270 Some(ansi_term::Color::RGB(r, g, b))
271 }
272 Some(5) => Some(ansi_term::Color::Fixed(u8::try_from(params.next()?).ok()?)),
273 _ => None,
274 }
275 }
276
277 #[cfg(test)]
278 mod tests {
279
280 use super::{AnsiElementIterator, Element};
281 use crate::style;
282
283 #[test]
test_iterator_parse_git_style_strings()284 fn test_iterator_parse_git_style_strings() {
285 for (git_style_string, git_output) in &*style::tests::GIT_STYLE_STRING_EXAMPLES {
286 let mut it = AnsiElementIterator::new(git_output);
287
288 if *git_style_string == "normal" {
289 // This one has a different pattern
290 assert!(
291 matches!(it.next().unwrap(), Element::Csi(s, _, _) if s == ansi_term::Style::default())
292 );
293 assert!(
294 matches!(it.next().unwrap(), Element::Text(i, j) if &git_output[i..j] == "text")
295 );
296 assert!(
297 matches!(it.next().unwrap(), Element::Csi(s, _, _) if s == ansi_term::Style::default())
298 );
299 continue;
300 }
301
302 // First element should be a style
303 let element = it.next().unwrap();
304 match element {
305 Element::Csi(style, _, _) => assert!(style::ansi_term_style_equality(
306 style,
307 style::Style::from_git_str(git_style_string).ansi_term_style
308 )),
309 _ => assert!(false),
310 }
311
312 // Second element should be text: "+"
313 assert!(matches!(
314 it.next().unwrap(),
315 Element::Text(i, j) if &git_output[i..j] == "+"));
316
317 // Third element is the reset style
318 assert!(matches!(
319 it.next().unwrap(),
320 Element::Csi(s, _, _) if s == ansi_term::Style::default()));
321
322 // Fourth element should be a style
323 let element = it.next().unwrap();
324 match element {
325 Element::Csi(style, _, _) => assert!(style::ansi_term_style_equality(
326 style,
327 style::Style::from_git_str(git_style_string).ansi_term_style
328 )),
329 _ => assert!(false),
330 }
331
332 // Fifth element should be text: "text"
333 assert!(matches!(
334 it.next().unwrap(),
335 Element::Text(i, j) if &git_output[i..j] == "text"));
336
337 // Sixth element is the reset style
338 assert!(matches!(
339 it.next().unwrap(),
340 Element::Csi(s, _, _) if s == ansi_term::Style::default()));
341
342 assert!(matches!(
343 it.next().unwrap(),
344 Element::Text(i, j) if &git_output[i..j] == "\n"));
345
346 assert!(it.next().is_none());
347 }
348 }
349
350 #[test]
test_iterator_1()351 fn test_iterator_1() {
352 let minus_line = "\x1b[31m0123\x1b[m\n";
353 let actual_elements: Vec<Element> = AnsiElementIterator::new(minus_line).collect();
354 assert_eq!(
355 actual_elements,
356 vec![
357 Element::Csi(
358 ansi_term::Style {
359 foreground: Some(ansi_term::Color::Red),
360 ..ansi_term::Style::default()
361 },
362 0,
363 5
364 ),
365 Element::Text(5, 9),
366 Element::Csi(ansi_term::Style::default(), 9, 12),
367 Element::Text(12, 13),
368 ]
369 );
370 assert_eq!("0123", &minus_line[5..9]);
371 assert_eq!("\n", &minus_line[12..13]);
372 }
373
374 #[test]
test_iterator_2()375 fn test_iterator_2() {
376 let minus_line = "\x1b[31m0123\x1b[m456\n";
377 let actual_elements: Vec<Element> = AnsiElementIterator::new(minus_line).collect();
378 assert_eq!(
379 actual_elements,
380 vec![
381 Element::Csi(
382 ansi_term::Style {
383 foreground: Some(ansi_term::Color::Red),
384 ..ansi_term::Style::default()
385 },
386 0,
387 5
388 ),
389 Element::Text(5, 9),
390 Element::Csi(ansi_term::Style::default(), 9, 12),
391 Element::Text(12, 16),
392 ]
393 );
394 assert_eq!("0123", &minus_line[5..9]);
395 assert_eq!("456\n", &minus_line[12..16]);
396 }
397
398 #[test]
test_iterator_styled_non_ascii()399 fn test_iterator_styled_non_ascii() {
400 let s = "\x1b[31mバー\x1b[0m";
401 let actual_elements: Vec<Element> = AnsiElementIterator::new(s).collect();
402 assert_eq!(
403 actual_elements,
404 vec![
405 Element::Csi(
406 ansi_term::Style {
407 foreground: Some(ansi_term::Color::Red),
408 ..ansi_term::Style::default()
409 },
410 0,
411 5
412 ),
413 Element::Text(5, 11),
414 Element::Csi(ansi_term::Style::default(), 11, 15),
415 ]
416 );
417 assert_eq!("バー", &s[5..11]);
418 }
419
420 #[test]
test_iterator_osc_hyperlinks_styled_non_ascii()421 fn test_iterator_osc_hyperlinks_styled_non_ascii() {
422 let s = "\x1b[38;5;4m\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b\\src/ansi/modバー.rs\x1b]8;;\x1b\\\x1b[0m\n";
423 assert_eq!(&s[0..9], "\x1b[38;5;4m");
424 assert_eq!(
425 &s[9..58],
426 "\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b"
427 );
428 assert_eq!(&s[58..59], "\\");
429 assert_eq!(&s[59..80], "src/ansi/modバー.rs");
430 assert_eq!(&s[80..86], "\x1b]8;;\x1b");
431 assert_eq!(&s[86..87], "\\");
432 assert_eq!(&s[87..91], "\x1b[0m");
433 assert_eq!(&s[91..92], "\n");
434 let actual_elements: Vec<Element> = AnsiElementIterator::new(s).collect();
435 assert_eq!(
436 actual_elements,
437 vec![
438 Element::Csi(
439 ansi_term::Style {
440 foreground: Some(ansi_term::Color::Fixed(4)),
441 ..ansi_term::Style::default()
442 },
443 0,
444 9
445 ),
446 Element::Osc(9, 58),
447 Element::Esc(58, 59),
448 Element::Text(59, 80),
449 Element::Osc(80, 86),
450 Element::Esc(86, 87),
451 Element::Csi(ansi_term::Style::default(), 87, 91),
452 Element::Text(91, 92),
453 ]
454 );
455 }
456 }
457