1 use core::str::Bytes;
2 use std::convert::TryFrom;
3 use std::iter;
4 use vte::{Params, ParamsIter};
5 
6 pub struct AnsiElementIterator<'a> {
7     // The input bytes
8     bytes: Bytes<'a>,
9 
10     // The state machine
11     machine: vte::Parser,
12 
13     // Becomes non-None when the parser finishes parsing an ANSI sequence.
14     // This is never Element::Text.
15     element: Option<Element>,
16 
17     // Number of text bytes seen since the last element was emitted.
18     text_length: usize,
19 
20     // Byte offset of start of current element.
21     start: usize,
22 
23     // Byte offset of most rightward byte processed so far
24     pos: usize,
25 }
26 
27 struct Performer {
28     // Becomes non-None when the parser finishes parsing an ANSI sequence.
29     // This is never Element::Text.
30     element: Option<Element>,
31 
32     // Number of text bytes seen since the last element was emitted.
33     text_length: usize,
34 }
35 
36 #[derive(Clone, Debug, PartialEq)]
37 pub enum Element {
38     Csi(ansi_term::Style, usize, usize),
39     Esc(usize, usize),
40     Osc(usize, usize),
41     Text(usize, usize),
42 }
43 
44 impl<'a> AnsiElementIterator<'a> {
new(s: &'a str) -> Self45     pub fn new(s: &'a str) -> Self {
46         Self {
47             machine: vte::Parser::new(),
48             bytes: s.bytes(),
49             element: None,
50             text_length: 0,
51             start: 0,
52             pos: 0,
53         }
54     }
55 
56     #[allow(dead_code)]
dbg(s: &str)57     pub fn dbg(s: &str) {
58         for el in AnsiElementIterator::new(s) {
59             match el {
60                 Element::Csi(_, i, j) => println!("CSI({}, {}, {:?})", i, j, &s[i..j]),
61                 Element::Esc(i, j) => println!("ESC({}, {}, {:?})", i, j, &s[i..j]),
62                 Element::Osc(i, j) => println!("OSC({}, {}, {:?})", i, j, &s[i..j]),
63                 Element::Text(i, j) => println!("Text({}, {}, {:?})", i, j, &s[i..j]),
64             }
65         }
66     }
67 }
68 
69 impl<'a> Iterator for AnsiElementIterator<'a> {
70     type Item = Element;
71 
next(&mut self) -> Option<Element>72     fn next(&mut self) -> Option<Element> {
73         loop {
74             // If the last element emitted was text, then there may be a non-text element waiting
75             // to be emitted. In that case we do not consume a new byte.
76             let byte = if self.element.is_some() {
77                 None
78             } else {
79                 self.bytes.next()
80             };
81             if byte.is_some() || self.element.is_some() {
82                 if let Some(byte) = byte {
83                     let mut performer = Performer {
84                         element: None,
85                         text_length: 0,
86                     };
87                     self.machine.advance(&mut performer, byte);
88                     self.element = performer.element;
89                     self.text_length += performer.text_length;
90                     self.pos += 1;
91                 }
92                 if self.element.is_some() {
93                     // There is a non-text element waiting to be emitted, but it may have preceding
94                     // text, which must be emitted first.
95                     if self.text_length > 0 {
96                         let start = self.start;
97                         self.start += self.text_length;
98                         self.text_length = 0;
99                         return Some(Element::Text(start, self.start));
100                     }
101                     let start = self.start;
102                     self.start = self.pos;
103                     let element = match self.element.as_ref().unwrap() {
104                         Element::Csi(style, _, _) => Element::Csi(*style, start, self.pos),
105                         Element::Esc(_, _) => Element::Esc(start, self.pos),
106                         Element::Osc(_, _) => Element::Osc(start, self.pos),
107                         Element::Text(_, _) => unreachable!(),
108                     };
109                     self.element = None;
110                     return Some(element);
111                 }
112             } else if self.text_length > 0 {
113                 self.text_length = 0;
114                 return Some(Element::Text(self.start, self.pos));
115             } else {
116                 return None;
117             }
118         }
119     }
120 }
121 
122 // Based on https://github.com/alacritty/vte/blob/v0.9.0/examples/parselog.rs
123 impl vte::Perform for Performer {
csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char)124     fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) {
125         if ignore || intermediates.len() > 1 {
126             return;
127         }
128 
129         if let ('m', None) = (c, intermediates.get(0)) {
130             if params.is_empty() {
131                 // Attr::Reset
132                 // Probably doesn't need to be handled: https://github.com/dandavison/delta/pull/431#discussion_r536883568
133             } else {
134                 self.element = Some(Element::Csi(
135                     ansi_term_style_from_sgr_parameters(&mut params.iter()),
136                     0,
137                     0,
138                 ));
139             }
140         }
141     }
142 
print(&mut self, c: char)143     fn print(&mut self, c: char) {
144         self.text_length += c.len_utf8();
145     }
146 
execute(&mut self, byte: u8)147     fn execute(&mut self, byte: u8) {
148         // E.g. '\n'
149         if byte < 128 {
150             self.text_length += 1;
151         }
152     }
153 
hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _c: char)154     fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _c: char) {}
155 
put(&mut self, _byte: u8)156     fn put(&mut self, _byte: u8) {}
157 
unhook(&mut self)158     fn unhook(&mut self) {}
159 
osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool)160     fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {
161         self.element = Some(Element::Osc(0, 0));
162     }
163 
esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8)164     fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {
165         self.element = Some(Element::Esc(0, 0));
166     }
167 }
168 
169 // Based on https://github.com/alacritty/alacritty/blob/9e71002e40d5487c6fa2571a3a3c4f5c8f679334/alacritty_terminal/src/ansi.rs#L1175
ansi_term_style_from_sgr_parameters(params: &mut ParamsIter<'_>) -> ansi_term::Style170 fn ansi_term_style_from_sgr_parameters(params: &mut ParamsIter<'_>) -> ansi_term::Style {
171     let mut style = ansi_term::Style::new();
172     while let Some(param) = params.next() {
173         match param {
174             // [0] => Some(Attr::Reset),
175             [1] => style.is_bold = true,
176             [2] => style.is_dimmed = true,
177             [3] => style.is_italic = true,
178             [4, ..] => style.is_underline = true,
179             [5] => style.is_blink = true, // blink slow
180             [6] => style.is_blink = true, // blink fast
181             [7] => style.is_reverse = true,
182             [8] => style.is_hidden = true,
183             [9] => style.is_strikethrough = true,
184             // [21] => Some(Attr::CancelBold),
185             // [22] => Some(Attr::CancelBoldDim),
186             // [23] => Some(Attr::CancelItalic),
187             // [24] => Some(Attr::CancelUnderline),
188             // [25] => Some(Attr::CancelBlink),
189             // [27] => Some(Attr::CancelReverse),
190             // [28] => Some(Attr::CancelHidden),
191             // [29] => Some(Attr::CancelStrike),
192             [30] => style.foreground = Some(ansi_term::Color::Black),
193             [31] => style.foreground = Some(ansi_term::Color::Red),
194             [32] => style.foreground = Some(ansi_term::Color::Green),
195             [33] => style.foreground = Some(ansi_term::Color::Yellow),
196             [34] => style.foreground = Some(ansi_term::Color::Blue),
197             [35] => style.foreground = Some(ansi_term::Color::Purple),
198             [36] => style.foreground = Some(ansi_term::Color::Cyan),
199             [37] => style.foreground = Some(ansi_term::Color::White),
200             [38] => {
201                 let mut iter = params.map(|param| param[0]);
202                 if let Some(color) = parse_sgr_color(&mut iter) {
203                     style.foreground = Some(color);
204                 }
205             }
206             [38, params @ ..] => {
207                 let rgb_start = if params.len() > 4 { 2 } else { 1 };
208                 let rgb_iter = params[rgb_start..].iter().copied();
209                 let mut iter = iter::once(params[0]).chain(rgb_iter);
210 
211                 if let Some(color) = parse_sgr_color(&mut iter) {
212                     style.foreground = Some(color);
213                 }
214             }
215             // [39] => Some(Attr::Foreground(Color::Named(NamedColor::Foreground))),
216             [40] => style.background = Some(ansi_term::Color::Black),
217             [41] => style.background = Some(ansi_term::Color::Red),
218             [42] => style.background = Some(ansi_term::Color::Green),
219             [43] => style.background = Some(ansi_term::Color::Yellow),
220             [44] => style.background = Some(ansi_term::Color::Blue),
221             [45] => style.background = Some(ansi_term::Color::Purple),
222             [46] => style.background = Some(ansi_term::Color::Cyan),
223             [47] => style.background = Some(ansi_term::Color::White),
224             [48] => {
225                 let mut iter = params.map(|param| param[0]);
226                 if let Some(color) = parse_sgr_color(&mut iter) {
227                     style.background = Some(color);
228                 }
229             }
230             [48, params @ ..] => {
231                 let rgb_start = if params.len() > 4 { 2 } else { 1 };
232                 let rgb_iter = params[rgb_start..].iter().copied();
233                 let mut iter = iter::once(params[0]).chain(rgb_iter);
234                 if let Some(color) = parse_sgr_color(&mut iter) {
235                     style.background = Some(color);
236                 }
237             }
238             // [49] => Some(Attr::Background(Color::Named(NamedColor::Background))),
239             // "bright" colors. ansi_term doesn't offer a way to emit them as, e.g., 90m; instead
240             // that would be 38;5;8.
241             [90] => style.foreground = Some(ansi_term::Color::Fixed(8)),
242             [91] => style.foreground = Some(ansi_term::Color::Fixed(9)),
243             [92] => style.foreground = Some(ansi_term::Color::Fixed(10)),
244             [93] => style.foreground = Some(ansi_term::Color::Fixed(11)),
245             [94] => style.foreground = Some(ansi_term::Color::Fixed(12)),
246             [95] => style.foreground = Some(ansi_term::Color::Fixed(13)),
247             [96] => style.foreground = Some(ansi_term::Color::Fixed(14)),
248             [97] => style.foreground = Some(ansi_term::Color::Fixed(15)),
249             [100] => style.background = Some(ansi_term::Color::Fixed(8)),
250             [101] => style.background = Some(ansi_term::Color::Fixed(9)),
251             [102] => style.background = Some(ansi_term::Color::Fixed(10)),
252             [103] => style.background = Some(ansi_term::Color::Fixed(11)),
253             [104] => style.background = Some(ansi_term::Color::Fixed(12)),
254             [105] => style.background = Some(ansi_term::Color::Fixed(13)),
255             [106] => style.background = Some(ansi_term::Color::Fixed(14)),
256             [107] => style.background = Some(ansi_term::Color::Fixed(15)),
257             _ => {}
258         };
259     }
260     style
261 }
262 
263 // Based on https://github.com/alacritty/alacritty/blob/57c4ac9145a20fb1ae9a21102503458d3da06c7b/alacritty_terminal/src/ansi.rs#L1258
parse_sgr_color(params: &mut dyn Iterator<Item = u16>) -> Option<ansi_term::Color>264 fn parse_sgr_color(params: &mut dyn Iterator<Item = u16>) -> Option<ansi_term::Color> {
265     match params.next() {
266         Some(2) => {
267             let r = u8::try_from(params.next()?).ok()?;
268             let g = u8::try_from(params.next()?).ok()?;
269             let b = u8::try_from(params.next()?).ok()?;
270             Some(ansi_term::Color::RGB(r, g, b))
271         }
272         Some(5) => Some(ansi_term::Color::Fixed(u8::try_from(params.next()?).ok()?)),
273         _ => None,
274     }
275 }
276 
277 #[cfg(test)]
278 mod tests {
279 
280     use super::{AnsiElementIterator, Element};
281     use crate::style;
282 
283     #[test]
test_iterator_parse_git_style_strings()284     fn test_iterator_parse_git_style_strings() {
285         for (git_style_string, git_output) in &*style::tests::GIT_STYLE_STRING_EXAMPLES {
286             let mut it = AnsiElementIterator::new(git_output);
287 
288             if *git_style_string == "normal" {
289                 // This one has a different pattern
290                 assert!(
291                     matches!(it.next().unwrap(), Element::Csi(s, _, _) if s == ansi_term::Style::default())
292                 );
293                 assert!(
294                     matches!(it.next().unwrap(), Element::Text(i, j) if &git_output[i..j] == "text")
295                 );
296                 assert!(
297                     matches!(it.next().unwrap(), Element::Csi(s, _, _) if s == ansi_term::Style::default())
298                 );
299                 continue;
300             }
301 
302             // First element should be a style
303             let element = it.next().unwrap();
304             match element {
305                 Element::Csi(style, _, _) => assert!(style::ansi_term_style_equality(
306                     style,
307                     style::Style::from_git_str(git_style_string).ansi_term_style
308                 )),
309                 _ => assert!(false),
310             }
311 
312             // Second element should be text: "+"
313             assert!(matches!(
314                 it.next().unwrap(),
315                 Element::Text(i, j) if &git_output[i..j] == "+"));
316 
317             // Third element is the reset style
318             assert!(matches!(
319                 it.next().unwrap(),
320                 Element::Csi(s, _, _) if s == ansi_term::Style::default()));
321 
322             // Fourth element should be a style
323             let element = it.next().unwrap();
324             match element {
325                 Element::Csi(style, _, _) => assert!(style::ansi_term_style_equality(
326                     style,
327                     style::Style::from_git_str(git_style_string).ansi_term_style
328                 )),
329                 _ => assert!(false),
330             }
331 
332             // Fifth element should be text: "text"
333             assert!(matches!(
334                 it.next().unwrap(),
335                 Element::Text(i, j) if &git_output[i..j] == "text"));
336 
337             // Sixth element is the reset style
338             assert!(matches!(
339                 it.next().unwrap(),
340                 Element::Csi(s, _, _) if s == ansi_term::Style::default()));
341 
342             assert!(matches!(
343                 it.next().unwrap(),
344                 Element::Text(i, j) if &git_output[i..j] == "\n"));
345 
346             assert!(it.next().is_none());
347         }
348     }
349 
350     #[test]
test_iterator_1()351     fn test_iterator_1() {
352         let minus_line = "\x1b[31m0123\x1b[m\n";
353         let actual_elements: Vec<Element> = AnsiElementIterator::new(minus_line).collect();
354         assert_eq!(
355             actual_elements,
356             vec![
357                 Element::Csi(
358                     ansi_term::Style {
359                         foreground: Some(ansi_term::Color::Red),
360                         ..ansi_term::Style::default()
361                     },
362                     0,
363                     5
364                 ),
365                 Element::Text(5, 9),
366                 Element::Csi(ansi_term::Style::default(), 9, 12),
367                 Element::Text(12, 13),
368             ]
369         );
370         assert_eq!("0123", &minus_line[5..9]);
371         assert_eq!("\n", &minus_line[12..13]);
372     }
373 
374     #[test]
test_iterator_2()375     fn test_iterator_2() {
376         let minus_line = "\x1b[31m0123\x1b[m456\n";
377         let actual_elements: Vec<Element> = AnsiElementIterator::new(minus_line).collect();
378         assert_eq!(
379             actual_elements,
380             vec![
381                 Element::Csi(
382                     ansi_term::Style {
383                         foreground: Some(ansi_term::Color::Red),
384                         ..ansi_term::Style::default()
385                     },
386                     0,
387                     5
388                 ),
389                 Element::Text(5, 9),
390                 Element::Csi(ansi_term::Style::default(), 9, 12),
391                 Element::Text(12, 16),
392             ]
393         );
394         assert_eq!("0123", &minus_line[5..9]);
395         assert_eq!("456\n", &minus_line[12..16]);
396     }
397 
398     #[test]
test_iterator_styled_non_ascii()399     fn test_iterator_styled_non_ascii() {
400         let s = "\x1b[31mバー\x1b[0m";
401         let actual_elements: Vec<Element> = AnsiElementIterator::new(s).collect();
402         assert_eq!(
403             actual_elements,
404             vec![
405                 Element::Csi(
406                     ansi_term::Style {
407                         foreground: Some(ansi_term::Color::Red),
408                         ..ansi_term::Style::default()
409                     },
410                     0,
411                     5
412                 ),
413                 Element::Text(5, 11),
414                 Element::Csi(ansi_term::Style::default(), 11, 15),
415             ]
416         );
417         assert_eq!("バー", &s[5..11]);
418     }
419 
420     #[test]
test_iterator_osc_hyperlinks_styled_non_ascii()421     fn test_iterator_osc_hyperlinks_styled_non_ascii() {
422         let s = "\x1b[38;5;4m\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b\\src/ansi/modバー.rs\x1b]8;;\x1b\\\x1b[0m\n";
423         assert_eq!(&s[0..9], "\x1b[38;5;4m");
424         assert_eq!(
425             &s[9..58],
426             "\x1b]8;;file:///Users/dan/src/delta/src/ansi/mod.rs\x1b"
427         );
428         assert_eq!(&s[58..59], "\\");
429         assert_eq!(&s[59..80], "src/ansi/modバー.rs");
430         assert_eq!(&s[80..86], "\x1b]8;;\x1b");
431         assert_eq!(&s[86..87], "\\");
432         assert_eq!(&s[87..91], "\x1b[0m");
433         assert_eq!(&s[91..92], "\n");
434         let actual_elements: Vec<Element> = AnsiElementIterator::new(s).collect();
435         assert_eq!(
436             actual_elements,
437             vec![
438                 Element::Csi(
439                     ansi_term::Style {
440                         foreground: Some(ansi_term::Color::Fixed(4)),
441                         ..ansi_term::Style::default()
442                     },
443                     0,
444                     9
445                 ),
446                 Element::Osc(9, 58),
447                 Element::Esc(58, 59),
448                 Element::Text(59, 80),
449                 Element::Osc(80, 86),
450                 Element::Esc(86, 87),
451                 Element::Csi(ansi_term::Style::default(), 87, 91),
452                 Element::Text(91, 92),
453             ]
454         );
455     }
456 }
457