1 //! Iterators and data structures for transforming parsing information into styled text.
2 
3 // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/highlighter.rs
4 // released under the MIT license by @defuz
5 
6 use std::iter::Iterator;
7 use std::ops::Range;
8 
9 use crate::parsing::{Scope, ScopeStack, BasicScopeStackOp, ScopeStackOp, MatchPower, ATOM_LEN_BITS};
10 use super::selector::ScopeSelector;
11 use super::theme::{Theme, ThemeItem};
12 use super::style::{Color, FontStyle, Style, StyleModifier};
13 
14 /// Basically a wrapper around a [`Theme`] preparing it to be used for highlighting.
15 ///
16 /// This is part of the API to preserve the possibility of caching matches of the
17 /// selectors of the theme on various scope paths or setting up some kind of
18 /// accelerator structure.
19 ///
20 /// So for now this does very little but eventually if you keep it around between
21 /// highlighting runs it will preserve its cache.
22 ///
23 /// [`Theme`]: struct.Theme.html
24 #[derive(Debug)]
25 pub struct Highlighter<'a> {
26     theme: &'a Theme,
27     /// Cache of the selectors in the theme that are only one scope
28     /// In most themes this is the majority, hence the usefullness
29     single_selectors: Vec<(Scope, StyleModifier)>,
30     multi_selectors: Vec<(ScopeSelector, StyleModifier)>,
31     // TODO single_cache: HashMap<Scope, StyleModifier, BuildHasherDefault<FnvHasher>>,
32 }
33 
34 /// Keeps a stack of scopes and styles as state between highlighting different lines.
35 ///
36 /// If you are highlighting an entire file you create one of these at the start and use it
37 /// all the way to the end.
38 ///
39 /// # Caching
40 ///
41 /// One reason this is exposed is that since it implements `Clone` you can actually cache these
42 /// (probably along with a [`ParseState`]) and only re-start highlighting from the point of a
43 /// change. You could also do something fancy like only highlight a bit past the end of a user's
44 /// screen and resume highlighting when they scroll down on large files.
45 ///
46 /// Alternatively you can save space by caching only the `path` field of this struct then re-create
47 /// the `HighlightState` when needed by passing that stack as the `initial_stack` parameter to the
48 /// [`new`] method. This takes less space but a small amount of time to re-create the style stack.
49 ///
50 /// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to
51 /// do so eventually. It is not recommended that you try caching the first time you implement
52 /// highlighting.
53 ///
54 /// [`ParseState`]: ../parsing/struct.ParseState.html
55 /// [`new`]: #method.new
56 #[derive(Debug, Clone, PartialEq, Eq)]
57 pub struct HighlightState {
58     styles: Vec<Style>,
59     single_caches: Vec<ScoredStyle>,
60     pub path: ScopeStack,
61 }
62 
63 /// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
64 ///
65 /// Yields the [`Style`], the text and well as the `Range` of the text in the source string.
66 ///
67 /// It splits a line of text into different pieces each with a [`Style`]
68 ///
69 /// [`HighlightState`]: struct.HighlightState.html
70 /// [`Style`]: struct.Style.html
71 #[derive(Debug)]
72 pub struct RangedHighlightIterator<'a, 'b> {
73     index: usize,
74     pos: usize,
75     changes: &'a [(usize, ScopeStackOp)],
76     text: &'b str,
77     highlighter: &'a Highlighter<'a>,
78     state: &'a mut HighlightState,
79 }
80 
81 /// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
82 ///
83 /// This is a backwards compatible shim on top of the [`RangedHighlightIterator`] which only
84 /// yields the [`Style`] and the text of the token, not the range.
85 ///
86 /// It splits a line of text into different pieces each with a [`Style`].
87 ///
88 /// [`HighlightState`]: struct.HighlightState.html
89 /// [`RangedHighlightIterator`]: struct.RangedHighlightIterator.html
90 /// [`Style`]: struct.Style.html
91 #[derive(Debug)]
92 pub struct HighlightIterator<'a, 'b> {
93     ranged_iterator: RangedHighlightIterator<'a, 'b>
94 }
95 
96 impl HighlightState {
97     /// Note that the [`Highlighter`] is not stored; it is used to construct the initial stack
98     /// of styles.
99     ///
100     /// Most of the time you'll want to pass an empty stack as `initial_stack`, but see the docs for
101     /// [`HighlightState`] for a discussion of advanced caching use cases.
102     ///
103     /// [`Highlighter`]: struct.Highlighter.html
104     /// [`HighlightState`]: struct.HighlightState.html
new(highlighter: &Highlighter<'_>, initial_stack: ScopeStack) -> HighlightState105     pub fn new(highlighter: &Highlighter<'_>, initial_stack: ScopeStack) -> HighlightState {
106         let mut styles = vec![highlighter.get_default()];
107         let mut single_caches = vec![ScoredStyle::from_style(styles[0])];
108         for i in 0..initial_stack.len() {
109             let prefix = initial_stack.bottom_n(i + 1);
110             let new_cache = highlighter.update_single_cache_for_push(&single_caches[i], prefix);
111             styles.push(highlighter.finalize_style_with_multis(&new_cache, prefix));
112             single_caches.push(new_cache);
113         }
114 
115         HighlightState {
116             styles,
117             single_caches,
118             path: initial_stack,
119         }
120     }
121 }
122 
123 impl<'a, 'b> RangedHighlightIterator<'a, 'b> {
new(state: &'a mut HighlightState, changes: &'a [(usize, ScopeStackOp)], text: &'b str, highlighter: &'a Highlighter<'_>) -> RangedHighlightIterator<'a, 'b>124     pub fn new(state: &'a mut HighlightState,
125                changes: &'a [(usize, ScopeStackOp)],
126                text: &'b str,
127                highlighter: &'a Highlighter<'_>)
128                -> RangedHighlightIterator<'a, 'b> {
129         RangedHighlightIterator {
130             index: 0,
131             pos: 0,
132             changes,
133             text,
134             highlighter,
135             state,
136         }
137     }
138 }
139 
140 impl<'a, 'b> Iterator for RangedHighlightIterator<'a, 'b> {
141     type Item = (Style, &'b str, Range<usize>);
142 
143     /// Yields the next token of text and the associated `Style` to render that text with.
144     /// the concatenation of the strings in each token will make the original string.
next(&mut self) -> Option<(Style, &'b str, Range<usize>)>145     fn next(&mut self) -> Option<(Style, &'b str, Range<usize>)> {
146         if self.pos == self.text.len() && self.index >= self.changes.len() {
147             return None;
148         }
149         let (end, command) = if self.index < self.changes.len() {
150             self.changes[self.index].clone()
151         } else {
152             (self.text.len(), ScopeStackOp::Noop)
153         };
154         // println!("{} - {:?}   {}:{}", self.index, self.pos, self.state.path.len(), self.state.styles.len());
155         let style = *self.state.styles.last().unwrap_or(&Style::default());
156         let text = &self.text[self.pos..end];
157         let range = Range { start: self.pos, end: end };
158         {
159             // closures mess with the borrow checker's ability to see different struct fields
160             let m_path = &mut self.state.path;
161             let m_styles = &mut self.state.styles;
162             let m_caches = &mut self.state.single_caches;
163             let highlighter = &self.highlighter;
164             m_path.apply_with_hook(&command, |op, cur_stack| {
165                 // println!("{:?} - {:?}", op, cur_stack);
166                 match op {
167                     BasicScopeStackOp::Push(_) => {
168                         // we can push multiple times so this might have changed
169                         let new_cache = {
170                             if let Some(prev_cache) = m_caches.last() {
171                                 highlighter.update_single_cache_for_push(prev_cache, cur_stack)
172                             } else {
173                                 highlighter.update_single_cache_for_push(&ScoredStyle::from_style(highlighter.get_default()), cur_stack)
174                             }
175                         };
176                         m_styles.push(highlighter.finalize_style_with_multis(&new_cache, cur_stack));
177                         m_caches.push(new_cache);
178                     }
179                     BasicScopeStackOp::Pop => {
180                         m_styles.pop();
181                         m_caches.pop();
182                     }
183                 }
184             });
185         }
186         self.pos = end;
187         self.index += 1;
188         if text.is_empty() {
189             self.next()
190         } else {
191             Some((style, text, range))
192         }
193     }
194 }
195 impl<'a, 'b> HighlightIterator<'a, 'b> {
new(state: &'a mut HighlightState, changes: &'a [(usize, ScopeStackOp)], text: &'b str, highlighter: &'a Highlighter<'_>) -> HighlightIterator<'a, 'b>196     pub fn new(state: &'a mut HighlightState,
197                changes: &'a [(usize, ScopeStackOp)],
198                text: &'b str,
199                highlighter: &'a Highlighter<'_>)
200         -> HighlightIterator<'a, 'b> {
201             HighlightIterator {
202                 ranged_iterator: RangedHighlightIterator {
203                     index: 0,
204                     pos: 0,
205                     changes,
206                     text,
207                     highlighter,
208                     state
209                 }
210             }
211     }
212 }
213 
214 impl<'a, 'b> Iterator for HighlightIterator<'a, 'b> {
215     type Item = (Style, &'b str);
216 
217     /// Yields the next token of text and the associated `Style` to render that text with.
218     /// the concatenation of the strings in each token will make the original string.
next(&mut self) -> Option<(Style, &'b str)>219     fn next(&mut self) -> Option<(Style, &'b str)> {
220         self.ranged_iterator.next().map(|e| (e.0, e.1))
221     }
222 }
223 
224 #[derive(Debug, Clone, PartialEq, Eq)]
225 pub struct ScoredStyle {
226     pub foreground: (MatchPower, Color),
227     pub background: (MatchPower, Color),
228     pub font_style: (MatchPower, FontStyle),
229 }
230 
231 #[inline]
update_scored<T: Clone>(scored: &mut (MatchPower, T), update: &Option<T>, score: MatchPower)232 fn update_scored<T: Clone>(scored: &mut (MatchPower, T), update: &Option<T>, score: MatchPower) {
233     if score > scored.0 {
234         if let Some(u) = update {
235             scored.0 = score;
236             scored.1 = u.clone();
237         }
238     }
239 }
240 
241 impl ScoredStyle {
apply(&mut self, other: &StyleModifier, score: MatchPower)242     fn apply(&mut self, other: &StyleModifier, score: MatchPower) {
243         update_scored(&mut self.foreground, &other.foreground, score);
244         update_scored(&mut self.background, &other.background, score);
245         update_scored(&mut self.font_style, &other.font_style, score);
246     }
247 
to_style(&self) -> Style248     fn to_style(&self) -> Style {
249         Style {
250             foreground: self.foreground.1,
251             background: self.background.1,
252             font_style: self.font_style.1,
253         }
254     }
255 
from_style(style: Style) -> ScoredStyle256     fn from_style(style: Style) -> ScoredStyle {
257         ScoredStyle {
258             foreground: (MatchPower(-1.0), style.foreground),
259             background: (MatchPower(-1.0), style.background),
260             font_style: (MatchPower(-1.0), style.font_style),
261         }
262     }
263 }
264 
265 impl<'a> Highlighter<'a> {
new(theme: &'a Theme) -> Highlighter<'a>266     pub fn new(theme: &'a Theme) -> Highlighter<'a> {
267         let mut single_selectors = Vec::new();
268         let mut multi_selectors = Vec::new();
269         for item in &theme.scopes {
270             for sel in &item.scope.selectors {
271                 if let Some(scope) = sel.extract_single_scope() {
272                     single_selectors.push((scope, item.style));
273                 } else {
274                     multi_selectors.push((sel.clone(), item.style));
275                 }
276             }
277         }
278         // So that deeper matching selectors get checked first
279         single_selectors.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
280 
281         Highlighter {
282             theme,
283             single_selectors,
284             multi_selectors,
285         }
286     }
287 
288     /// The default style in the absence of any matched rules.
289     /// Basically what plain text gets highlighted as.
get_default(&self) -> Style290     pub fn get_default(&self) -> Style {
291         Style {
292             foreground: self.theme.settings.foreground.unwrap_or(Color::BLACK),
293             background: self.theme.settings.background.unwrap_or(Color::WHITE),
294             font_style: FontStyle::empty(),
295         }
296     }
297 
update_single_cache_for_push(&self, cur: &ScoredStyle, path: &[Scope]) -> ScoredStyle298     fn update_single_cache_for_push(&self, cur: &ScoredStyle, path: &[Scope]) -> ScoredStyle {
299         let mut new_style = cur.clone();
300 
301         let last_scope = path[path.len() - 1];
302         for &(scope, ref modif) in self.single_selectors.iter().filter(|a| a.0.is_prefix_of(last_scope)) {
303             let single_score = f64::from(scope.len()) *
304                                f64::from(ATOM_LEN_BITS * ((path.len() - 1) as u16)).exp2();
305             new_style.apply(modif, MatchPower(single_score));
306         }
307 
308         new_style
309     }
310 
finalize_style_with_multis(&self, cur: &ScoredStyle, path: &[Scope]) -> Style311     fn finalize_style_with_multis(&self, cur: &ScoredStyle, path: &[Scope]) -> Style {
312         let mut new_style = cur.clone();
313 
314         let mult_iter = self.multi_selectors
315             .iter()
316             .filter_map(|&(ref sel, ref style)| sel.does_match(path).map(|score| (score, style)));
317         for (score, ref modif) in mult_iter {
318             new_style.apply(modif, score);
319         }
320 
321         new_style.to_style()
322     }
323 
324     /// Returns the fully resolved style for the given stack.
325     ///
326     /// This operation is convenient but expensive. For reasonable performance,
327     /// the caller should be caching results.
style_for_stack(&self, stack: &[Scope]) -> Style328     pub fn style_for_stack(&self, stack: &[Scope]) -> Style {
329         let mut single_cache = ScoredStyle::from_style(self.get_default());
330         for i in 0..stack.len() {
331             single_cache = self.update_single_cache_for_push(&single_cache, &stack[0..i+1]);
332         }
333         self.finalize_style_with_multis(&single_cache, stack)
334     }
335 
336     /// Returns a [`StyleModifier`] which, if applied to the default style,
337     /// would generate the fully resolved style for this stack.
338     ///
339     /// This is made available to applications that are using syntect styles
340     /// in combination with style information from other sources.
341     ///
342     /// This operation is convenient but expensive. For reasonable performance,
343     /// the caller should be caching results. It's likely slower than [`style_for_stack`].
344     ///
345     /// [`StyleModifier`]: struct.StyleModifier.html
346     /// [`style_for_stack`]: #method.style_for_stack
style_mod_for_stack(&self, path: &[Scope]) -> StyleModifier347     pub fn style_mod_for_stack(&self, path: &[Scope]) -> StyleModifier {
348         let mut matching_items : Vec<(MatchPower, &ThemeItem)> = self.theme
349             .scopes
350             .iter()
351             .filter_map(|item| {
352                 item.scope
353                     .does_match(path)
354                     .map(|score| (score, item))
355             })
356             .collect();
357         matching_items.sort_by_key(|&(score, _)| score);
358         let sorted = matching_items.iter()
359             .map(|(_, item)| item);
360 
361         let mut modifier = StyleModifier {
362             background: None,
363             foreground: None,
364             font_style: None,
365         };
366         for item in sorted {
367             modifier = modifier.apply(item.style);
368         }
369         modifier
370     }
371 }
372 
373 #[cfg(all(feature = "assets", feature = "parsing", any(feature = "dump-load", feature = "dump-load-rs")))]
374 #[cfg(test)]
375 mod tests {
376     use super::*;
377     use crate::highlighting::{ThemeSet, Style, Color, FontStyle};
378     use crate::parsing::{ SyntaxSet, ScopeStack, ParseState};
379 
380     #[test]
can_parse()381     fn can_parse() {
382         let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
383         let mut state = {
384             let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
385             ParseState::new(syntax)
386         };
387         let ts = ThemeSet::load_defaults();
388         let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
389 
390         let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
391         let line = "module Bob::Wow::Troll::Five; 5; end";
392         let ops = state.parse_line(line, &ps);
393         let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
394         let regions: Vec<(Style, &str)> = iter.collect();
395         // println!("{:#?}", regions);
396         assert_eq!(regions[11],
397                    (Style {
398                        foreground: Color {
399                            r: 208,
400                            g: 135,
401                            b: 112,
402                            a: 0xFF,
403                        },
404                        background: Color {
405                            r: 43,
406                            g: 48,
407                            b: 59,
408                            a: 0xFF,
409                        },
410                        font_style: FontStyle::empty(),
411                    },
412                     "5"));
413     }
414 
415     #[test]
can_parse_with_highlight_state_from_cache()416     fn can_parse_with_highlight_state_from_cache() {
417         let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
418         let mut state = {
419             let syntax = ps.find_syntax_by_scope(
420                 Scope::new("source.python").unwrap()).unwrap();
421             ParseState::new(syntax)
422         };
423         let ts = ThemeSet::load_defaults();
424         let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
425 
426         // We start by parsing a python multiline-comment: """
427         let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
428         let line = r#"""""#;
429         let ops = state.parse_line(line, &ps);
430         let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
431         assert_eq!(1, iter.count());
432         let path = highlight_state.path;
433 
434         // We then parse the next line with a highlight state built from the previous state
435         let mut highlight_state = HighlightState::new(&highlighter, path);
436         let line = "multiline comment";
437         let ops = state.parse_line(line, &ps);
438         let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
439         let regions: Vec<(Style, &str)> = iter.collect();
440 
441         // We expect the line to be styled as a comment.
442         assert_eq!(regions[0],
443                    (Style {
444                        foreground: Color {
445                            // (Comment: #65737E)
446                            r: 101,
447                            g: 115,
448                            b: 126,
449                            a: 0xFF,
450                        },
451                        background: Color {
452                            r: 43,
453                            g: 48,
454                            b: 59,
455                            a: 0xFF,
456                        },
457                        font_style: FontStyle::empty(),
458                    },
459                     "multiline comment"));
460     }
461 
462     // see issues #133 and #203, this test tests the fixes for those issues
463     #[test]
tricky_cases()464     fn tricky_cases() {
465         use crate::parsing::ScopeStack;
466         use std::str::FromStr;
467         use crate::highlighting::{ThemeSettings, ScopeSelectors};
468         let c1 = Color { r: 1, g: 1, b: 1, a: 255 };
469         let c2 = Color { r: 2, g: 2, b: 2, a: 255 };
470         let def_bg = Color { r: 255, g: 255, b: 255, a: 255 };
471         let test_color_scheme = Theme {
472             name: None,
473             author: None,
474             settings: ThemeSettings::default(),
475             scopes: vec![
476                 ThemeItem {
477                     scope: ScopeSelectors::from_str("comment.line").unwrap(),
478                     style: StyleModifier {
479                         foreground: Some(c1),
480                         background: None,
481                         font_style: None,
482                     },
483                 },
484                 ThemeItem {
485                     scope: ScopeSelectors::from_str("comment").unwrap(),
486                     style: StyleModifier {
487                         foreground: Some(c2),
488                         background: None,
489                         font_style: Some(FontStyle::ITALIC),
490                     },
491                 },
492                 ThemeItem {
493                     scope: ScopeSelectors::from_str("comment.line.rs - keyword").unwrap(),
494                     style: StyleModifier {
495                         foreground: None,
496                         background: Some(c1),
497                         font_style: None,
498                     },
499                 },
500                 ThemeItem {
501                     scope: ScopeSelectors::from_str("no.match").unwrap(),
502                     style: StyleModifier {
503                         foreground: None,
504                         background: Some(c2),
505                         font_style: Some(FontStyle::UNDERLINE),
506                     },
507                 },
508             ],
509         };
510         let highlighter = Highlighter::new(&test_color_scheme);
511 
512         use crate::parsing::ScopeStackOp::*;
513         let ops = [
514             // three rules apply at once here, two singles and one multi
515             (0, Push(Scope::new("comment.line.rs").unwrap())),
516             // multi un-applies
517             (1, Push(Scope::new("keyword.control.rs").unwrap())),
518             (2, Pop(1)),
519         ];
520 
521         let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
522         let iter = HighlightIterator::new(&mut highlight_state, &ops[..], "abcdef", &highlighter);
523         let regions: Vec<Style> = iter.map(|(s, _)| s).collect();
524 
525         // println!("{:#?}", regions);
526         assert_eq!(regions, vec![
527             Style { foreground: c1, background: c1, font_style: FontStyle::ITALIC },
528             Style { foreground: c1, background: def_bg, font_style: FontStyle::ITALIC },
529             Style { foreground: c1, background: c1, font_style: FontStyle::ITALIC },
530         ]);
531 
532         let full_stack = ScopeStack::from_str("comment.line.rs keyword.control.rs").unwrap();
533         let full_style = highlighter.style_for_stack(full_stack.as_slice());
534         assert_eq!(full_style, Style { foreground: c1, background: def_bg, font_style: FontStyle::ITALIC });
535         let full_mod = highlighter.style_mod_for_stack(full_stack.as_slice());
536         assert_eq!(full_mod, StyleModifier { foreground: Some(c1), background: None, font_style: Some(FontStyle::ITALIC) });
537     }
538 
539     #[test]
test_ranges()540     fn test_ranges() {
541         let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
542         let mut state = {
543             let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
544             ParseState::new(syntax)
545         };
546         let ts = ThemeSet::load_defaults();
547         let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
548 
549         let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
550         let line = "module Bob::Wow::Troll::Five; 5; end";
551         let ops = state.parse_line(line, &ps);
552         let iter = RangedHighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
553         let regions: Vec<(Style, &str, Range<usize>)> = iter.collect();
554         // println!("{:#?}", regions);
555         assert_eq!(regions[11],
556                    (Style {
557                        foreground: Color {
558                            r: 208,
559                            g: 135,
560                            b: 112,
561                            a: 0xFF,
562                        },
563                        background: Color {
564                            r: 43,
565                            g: 48,
566                            b: 59,
567                            a: 0xFF,
568                        },
569                        font_style: FontStyle::empty(),
570                    },
571                     "5", Range { start: 30, end: 31 }));
572     }
573 }
574