1 //! Iterators and data structures for transforming parsing information into styled text.
2
3 // Code based on https://github.com/defuz/sublimate/blob/master/src/core/syntax/highlighter.rs
4 // released under the MIT license by @defuz
5
6 use std::iter::Iterator;
7 use std::ops::Range;
8
9 use crate::parsing::{Scope, ScopeStack, BasicScopeStackOp, ScopeStackOp, MatchPower, ATOM_LEN_BITS};
10 use super::selector::ScopeSelector;
11 use super::theme::{Theme, ThemeItem};
12 use super::style::{Color, FontStyle, Style, StyleModifier};
13
14 /// Basically a wrapper around a [`Theme`] preparing it to be used for highlighting.
15 ///
16 /// This is part of the API to preserve the possibility of caching matches of the
17 /// selectors of the theme on various scope paths or setting up some kind of
18 /// accelerator structure.
19 ///
20 /// So for now this does very little but eventually if you keep it around between
21 /// highlighting runs it will preserve its cache.
22 ///
23 /// [`Theme`]: struct.Theme.html
24 #[derive(Debug)]
25 pub struct Highlighter<'a> {
26 theme: &'a Theme,
27 /// Cache of the selectors in the theme that are only one scope
28 /// In most themes this is the majority, hence the usefullness
29 single_selectors: Vec<(Scope, StyleModifier)>,
30 multi_selectors: Vec<(ScopeSelector, StyleModifier)>,
31 // TODO single_cache: HashMap<Scope, StyleModifier, BuildHasherDefault<FnvHasher>>,
32 }
33
34 /// Keeps a stack of scopes and styles as state between highlighting different lines.
35 ///
36 /// If you are highlighting an entire file you create one of these at the start and use it
37 /// all the way to the end.
38 ///
39 /// # Caching
40 ///
41 /// One reason this is exposed is that since it implements `Clone` you can actually cache these
42 /// (probably along with a [`ParseState`]) and only re-start highlighting from the point of a
43 /// change. You could also do something fancy like only highlight a bit past the end of a user's
44 /// screen and resume highlighting when they scroll down on large files.
45 ///
46 /// Alternatively you can save space by caching only the `path` field of this struct then re-create
47 /// the `HighlightState` when needed by passing that stack as the `initial_stack` parameter to the
48 /// [`new`] method. This takes less space but a small amount of time to re-create the style stack.
49 ///
50 /// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to
51 /// do so eventually. It is not recommended that you try caching the first time you implement
52 /// highlighting.
53 ///
54 /// [`ParseState`]: ../parsing/struct.ParseState.html
55 /// [`new`]: #method.new
56 #[derive(Debug, Clone, PartialEq, Eq)]
57 pub struct HighlightState {
58 styles: Vec<Style>,
59 single_caches: Vec<ScoredStyle>,
60 pub path: ScopeStack,
61 }
62
63 /// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
64 ///
65 /// Yields the [`Style`], the text and well as the `Range` of the text in the source string.
66 ///
67 /// It splits a line of text into different pieces each with a [`Style`]
68 ///
69 /// [`HighlightState`]: struct.HighlightState.html
70 /// [`Style`]: struct.Style.html
71 #[derive(Debug)]
72 pub struct RangedHighlightIterator<'a, 'b> {
73 index: usize,
74 pos: usize,
75 changes: &'a [(usize, ScopeStackOp)],
76 text: &'b str,
77 highlighter: &'a Highlighter<'a>,
78 state: &'a mut HighlightState,
79 }
80
81 /// Highlights a line of parsed code given a [`HighlightState`] and line of changes from the parser.
82 ///
83 /// This is a backwards compatible shim on top of the [`RangedHighlightIterator`] which only
84 /// yields the [`Style`] and the text of the token, not the range.
85 ///
86 /// It splits a line of text into different pieces each with a [`Style`].
87 ///
88 /// [`HighlightState`]: struct.HighlightState.html
89 /// [`RangedHighlightIterator`]: struct.RangedHighlightIterator.html
90 /// [`Style`]: struct.Style.html
91 #[derive(Debug)]
92 pub struct HighlightIterator<'a, 'b> {
93 ranged_iterator: RangedHighlightIterator<'a, 'b>
94 }
95
96 impl HighlightState {
97 /// Note that the [`Highlighter`] is not stored; it is used to construct the initial stack
98 /// of styles.
99 ///
100 /// Most of the time you'll want to pass an empty stack as `initial_stack`, but see the docs for
101 /// [`HighlightState`] for a discussion of advanced caching use cases.
102 ///
103 /// [`Highlighter`]: struct.Highlighter.html
104 /// [`HighlightState`]: struct.HighlightState.html
new(highlighter: &Highlighter<'_>, initial_stack: ScopeStack) -> HighlightState105 pub fn new(highlighter: &Highlighter<'_>, initial_stack: ScopeStack) -> HighlightState {
106 let mut styles = vec![highlighter.get_default()];
107 let mut single_caches = vec![ScoredStyle::from_style(styles[0])];
108 for i in 0..initial_stack.len() {
109 let prefix = initial_stack.bottom_n(i + 1);
110 let new_cache = highlighter.update_single_cache_for_push(&single_caches[i], prefix);
111 styles.push(highlighter.finalize_style_with_multis(&new_cache, prefix));
112 single_caches.push(new_cache);
113 }
114
115 HighlightState {
116 styles,
117 single_caches,
118 path: initial_stack,
119 }
120 }
121 }
122
123 impl<'a, 'b> RangedHighlightIterator<'a, 'b> {
new(state: &'a mut HighlightState, changes: &'a [(usize, ScopeStackOp)], text: &'b str, highlighter: &'a Highlighter<'_>) -> RangedHighlightIterator<'a, 'b>124 pub fn new(state: &'a mut HighlightState,
125 changes: &'a [(usize, ScopeStackOp)],
126 text: &'b str,
127 highlighter: &'a Highlighter<'_>)
128 -> RangedHighlightIterator<'a, 'b> {
129 RangedHighlightIterator {
130 index: 0,
131 pos: 0,
132 changes,
133 text,
134 highlighter,
135 state,
136 }
137 }
138 }
139
140 impl<'a, 'b> Iterator for RangedHighlightIterator<'a, 'b> {
141 type Item = (Style, &'b str, Range<usize>);
142
143 /// Yields the next token of text and the associated `Style` to render that text with.
144 /// the concatenation of the strings in each token will make the original string.
next(&mut self) -> Option<(Style, &'b str, Range<usize>)>145 fn next(&mut self) -> Option<(Style, &'b str, Range<usize>)> {
146 if self.pos == self.text.len() && self.index >= self.changes.len() {
147 return None;
148 }
149 let (end, command) = if self.index < self.changes.len() {
150 self.changes[self.index].clone()
151 } else {
152 (self.text.len(), ScopeStackOp::Noop)
153 };
154 // println!("{} - {:?} {}:{}", self.index, self.pos, self.state.path.len(), self.state.styles.len());
155 let style = *self.state.styles.last().unwrap_or(&Style::default());
156 let text = &self.text[self.pos..end];
157 let range = Range { start: self.pos, end: end };
158 {
159 // closures mess with the borrow checker's ability to see different struct fields
160 let m_path = &mut self.state.path;
161 let m_styles = &mut self.state.styles;
162 let m_caches = &mut self.state.single_caches;
163 let highlighter = &self.highlighter;
164 m_path.apply_with_hook(&command, |op, cur_stack| {
165 // println!("{:?} - {:?}", op, cur_stack);
166 match op {
167 BasicScopeStackOp::Push(_) => {
168 // we can push multiple times so this might have changed
169 let new_cache = {
170 if let Some(prev_cache) = m_caches.last() {
171 highlighter.update_single_cache_for_push(prev_cache, cur_stack)
172 } else {
173 highlighter.update_single_cache_for_push(&ScoredStyle::from_style(highlighter.get_default()), cur_stack)
174 }
175 };
176 m_styles.push(highlighter.finalize_style_with_multis(&new_cache, cur_stack));
177 m_caches.push(new_cache);
178 }
179 BasicScopeStackOp::Pop => {
180 m_styles.pop();
181 m_caches.pop();
182 }
183 }
184 });
185 }
186 self.pos = end;
187 self.index += 1;
188 if text.is_empty() {
189 self.next()
190 } else {
191 Some((style, text, range))
192 }
193 }
194 }
195 impl<'a, 'b> HighlightIterator<'a, 'b> {
new(state: &'a mut HighlightState, changes: &'a [(usize, ScopeStackOp)], text: &'b str, highlighter: &'a Highlighter<'_>) -> HighlightIterator<'a, 'b>196 pub fn new(state: &'a mut HighlightState,
197 changes: &'a [(usize, ScopeStackOp)],
198 text: &'b str,
199 highlighter: &'a Highlighter<'_>)
200 -> HighlightIterator<'a, 'b> {
201 HighlightIterator {
202 ranged_iterator: RangedHighlightIterator {
203 index: 0,
204 pos: 0,
205 changes,
206 text,
207 highlighter,
208 state
209 }
210 }
211 }
212 }
213
214 impl<'a, 'b> Iterator for HighlightIterator<'a, 'b> {
215 type Item = (Style, &'b str);
216
217 /// Yields the next token of text and the associated `Style` to render that text with.
218 /// the concatenation of the strings in each token will make the original string.
next(&mut self) -> Option<(Style, &'b str)>219 fn next(&mut self) -> Option<(Style, &'b str)> {
220 self.ranged_iterator.next().map(|e| (e.0, e.1))
221 }
222 }
223
224 #[derive(Debug, Clone, PartialEq, Eq)]
225 pub struct ScoredStyle {
226 pub foreground: (MatchPower, Color),
227 pub background: (MatchPower, Color),
228 pub font_style: (MatchPower, FontStyle),
229 }
230
231 #[inline]
update_scored<T: Clone>(scored: &mut (MatchPower, T), update: &Option<T>, score: MatchPower)232 fn update_scored<T: Clone>(scored: &mut (MatchPower, T), update: &Option<T>, score: MatchPower) {
233 if score > scored.0 {
234 if let Some(u) = update {
235 scored.0 = score;
236 scored.1 = u.clone();
237 }
238 }
239 }
240
241 impl ScoredStyle {
apply(&mut self, other: &StyleModifier, score: MatchPower)242 fn apply(&mut self, other: &StyleModifier, score: MatchPower) {
243 update_scored(&mut self.foreground, &other.foreground, score);
244 update_scored(&mut self.background, &other.background, score);
245 update_scored(&mut self.font_style, &other.font_style, score);
246 }
247
to_style(&self) -> Style248 fn to_style(&self) -> Style {
249 Style {
250 foreground: self.foreground.1,
251 background: self.background.1,
252 font_style: self.font_style.1,
253 }
254 }
255
from_style(style: Style) -> ScoredStyle256 fn from_style(style: Style) -> ScoredStyle {
257 ScoredStyle {
258 foreground: (MatchPower(-1.0), style.foreground),
259 background: (MatchPower(-1.0), style.background),
260 font_style: (MatchPower(-1.0), style.font_style),
261 }
262 }
263 }
264
265 impl<'a> Highlighter<'a> {
new(theme: &'a Theme) -> Highlighter<'a>266 pub fn new(theme: &'a Theme) -> Highlighter<'a> {
267 let mut single_selectors = Vec::new();
268 let mut multi_selectors = Vec::new();
269 for item in &theme.scopes {
270 for sel in &item.scope.selectors {
271 if let Some(scope) = sel.extract_single_scope() {
272 single_selectors.push((scope, item.style));
273 } else {
274 multi_selectors.push((sel.clone(), item.style));
275 }
276 }
277 }
278 // So that deeper matching selectors get checked first
279 single_selectors.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
280
281 Highlighter {
282 theme,
283 single_selectors,
284 multi_selectors,
285 }
286 }
287
288 /// The default style in the absence of any matched rules.
289 /// Basically what plain text gets highlighted as.
get_default(&self) -> Style290 pub fn get_default(&self) -> Style {
291 Style {
292 foreground: self.theme.settings.foreground.unwrap_or(Color::BLACK),
293 background: self.theme.settings.background.unwrap_or(Color::WHITE),
294 font_style: FontStyle::empty(),
295 }
296 }
297
update_single_cache_for_push(&self, cur: &ScoredStyle, path: &[Scope]) -> ScoredStyle298 fn update_single_cache_for_push(&self, cur: &ScoredStyle, path: &[Scope]) -> ScoredStyle {
299 let mut new_style = cur.clone();
300
301 let last_scope = path[path.len() - 1];
302 for &(scope, ref modif) in self.single_selectors.iter().filter(|a| a.0.is_prefix_of(last_scope)) {
303 let single_score = f64::from(scope.len()) *
304 f64::from(ATOM_LEN_BITS * ((path.len() - 1) as u16)).exp2();
305 new_style.apply(modif, MatchPower(single_score));
306 }
307
308 new_style
309 }
310
finalize_style_with_multis(&self, cur: &ScoredStyle, path: &[Scope]) -> Style311 fn finalize_style_with_multis(&self, cur: &ScoredStyle, path: &[Scope]) -> Style {
312 let mut new_style = cur.clone();
313
314 let mult_iter = self.multi_selectors
315 .iter()
316 .filter_map(|&(ref sel, ref style)| sel.does_match(path).map(|score| (score, style)));
317 for (score, ref modif) in mult_iter {
318 new_style.apply(modif, score);
319 }
320
321 new_style.to_style()
322 }
323
324 /// Returns the fully resolved style for the given stack.
325 ///
326 /// This operation is convenient but expensive. For reasonable performance,
327 /// the caller should be caching results.
style_for_stack(&self, stack: &[Scope]) -> Style328 pub fn style_for_stack(&self, stack: &[Scope]) -> Style {
329 let mut single_cache = ScoredStyle::from_style(self.get_default());
330 for i in 0..stack.len() {
331 single_cache = self.update_single_cache_for_push(&single_cache, &stack[0..i+1]);
332 }
333 self.finalize_style_with_multis(&single_cache, stack)
334 }
335
336 /// Returns a [`StyleModifier`] which, if applied to the default style,
337 /// would generate the fully resolved style for this stack.
338 ///
339 /// This is made available to applications that are using syntect styles
340 /// in combination with style information from other sources.
341 ///
342 /// This operation is convenient but expensive. For reasonable performance,
343 /// the caller should be caching results. It's likely slower than [`style_for_stack`].
344 ///
345 /// [`StyleModifier`]: struct.StyleModifier.html
346 /// [`style_for_stack`]: #method.style_for_stack
style_mod_for_stack(&self, path: &[Scope]) -> StyleModifier347 pub fn style_mod_for_stack(&self, path: &[Scope]) -> StyleModifier {
348 let mut matching_items : Vec<(MatchPower, &ThemeItem)> = self.theme
349 .scopes
350 .iter()
351 .filter_map(|item| {
352 item.scope
353 .does_match(path)
354 .map(|score| (score, item))
355 })
356 .collect();
357 matching_items.sort_by_key(|&(score, _)| score);
358 let sorted = matching_items.iter()
359 .map(|(_, item)| item);
360
361 let mut modifier = StyleModifier {
362 background: None,
363 foreground: None,
364 font_style: None,
365 };
366 for item in sorted {
367 modifier = modifier.apply(item.style);
368 }
369 modifier
370 }
371 }
372
373 #[cfg(all(feature = "assets", feature = "parsing", any(feature = "dump-load", feature = "dump-load-rs")))]
374 #[cfg(test)]
375 mod tests {
376 use super::*;
377 use crate::highlighting::{ThemeSet, Style, Color, FontStyle};
378 use crate::parsing::{ SyntaxSet, ScopeStack, ParseState};
379
380 #[test]
can_parse()381 fn can_parse() {
382 let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
383 let mut state = {
384 let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
385 ParseState::new(syntax)
386 };
387 let ts = ThemeSet::load_defaults();
388 let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
389
390 let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
391 let line = "module Bob::Wow::Troll::Five; 5; end";
392 let ops = state.parse_line(line, &ps);
393 let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
394 let regions: Vec<(Style, &str)> = iter.collect();
395 // println!("{:#?}", regions);
396 assert_eq!(regions[11],
397 (Style {
398 foreground: Color {
399 r: 208,
400 g: 135,
401 b: 112,
402 a: 0xFF,
403 },
404 background: Color {
405 r: 43,
406 g: 48,
407 b: 59,
408 a: 0xFF,
409 },
410 font_style: FontStyle::empty(),
411 },
412 "5"));
413 }
414
415 #[test]
can_parse_with_highlight_state_from_cache()416 fn can_parse_with_highlight_state_from_cache() {
417 let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
418 let mut state = {
419 let syntax = ps.find_syntax_by_scope(
420 Scope::new("source.python").unwrap()).unwrap();
421 ParseState::new(syntax)
422 };
423 let ts = ThemeSet::load_defaults();
424 let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
425
426 // We start by parsing a python multiline-comment: """
427 let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
428 let line = r#"""""#;
429 let ops = state.parse_line(line, &ps);
430 let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
431 assert_eq!(1, iter.count());
432 let path = highlight_state.path;
433
434 // We then parse the next line with a highlight state built from the previous state
435 let mut highlight_state = HighlightState::new(&highlighter, path);
436 let line = "multiline comment";
437 let ops = state.parse_line(line, &ps);
438 let iter = HighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
439 let regions: Vec<(Style, &str)> = iter.collect();
440
441 // We expect the line to be styled as a comment.
442 assert_eq!(regions[0],
443 (Style {
444 foreground: Color {
445 // (Comment: #65737E)
446 r: 101,
447 g: 115,
448 b: 126,
449 a: 0xFF,
450 },
451 background: Color {
452 r: 43,
453 g: 48,
454 b: 59,
455 a: 0xFF,
456 },
457 font_style: FontStyle::empty(),
458 },
459 "multiline comment"));
460 }
461
462 // see issues #133 and #203, this test tests the fixes for those issues
463 #[test]
tricky_cases()464 fn tricky_cases() {
465 use crate::parsing::ScopeStack;
466 use std::str::FromStr;
467 use crate::highlighting::{ThemeSettings, ScopeSelectors};
468 let c1 = Color { r: 1, g: 1, b: 1, a: 255 };
469 let c2 = Color { r: 2, g: 2, b: 2, a: 255 };
470 let def_bg = Color { r: 255, g: 255, b: 255, a: 255 };
471 let test_color_scheme = Theme {
472 name: None,
473 author: None,
474 settings: ThemeSettings::default(),
475 scopes: vec![
476 ThemeItem {
477 scope: ScopeSelectors::from_str("comment.line").unwrap(),
478 style: StyleModifier {
479 foreground: Some(c1),
480 background: None,
481 font_style: None,
482 },
483 },
484 ThemeItem {
485 scope: ScopeSelectors::from_str("comment").unwrap(),
486 style: StyleModifier {
487 foreground: Some(c2),
488 background: None,
489 font_style: Some(FontStyle::ITALIC),
490 },
491 },
492 ThemeItem {
493 scope: ScopeSelectors::from_str("comment.line.rs - keyword").unwrap(),
494 style: StyleModifier {
495 foreground: None,
496 background: Some(c1),
497 font_style: None,
498 },
499 },
500 ThemeItem {
501 scope: ScopeSelectors::from_str("no.match").unwrap(),
502 style: StyleModifier {
503 foreground: None,
504 background: Some(c2),
505 font_style: Some(FontStyle::UNDERLINE),
506 },
507 },
508 ],
509 };
510 let highlighter = Highlighter::new(&test_color_scheme);
511
512 use crate::parsing::ScopeStackOp::*;
513 let ops = [
514 // three rules apply at once here, two singles and one multi
515 (0, Push(Scope::new("comment.line.rs").unwrap())),
516 // multi un-applies
517 (1, Push(Scope::new("keyword.control.rs").unwrap())),
518 (2, Pop(1)),
519 ];
520
521 let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
522 let iter = HighlightIterator::new(&mut highlight_state, &ops[..], "abcdef", &highlighter);
523 let regions: Vec<Style> = iter.map(|(s, _)| s).collect();
524
525 // println!("{:#?}", regions);
526 assert_eq!(regions, vec![
527 Style { foreground: c1, background: c1, font_style: FontStyle::ITALIC },
528 Style { foreground: c1, background: def_bg, font_style: FontStyle::ITALIC },
529 Style { foreground: c1, background: c1, font_style: FontStyle::ITALIC },
530 ]);
531
532 let full_stack = ScopeStack::from_str("comment.line.rs keyword.control.rs").unwrap();
533 let full_style = highlighter.style_for_stack(full_stack.as_slice());
534 assert_eq!(full_style, Style { foreground: c1, background: def_bg, font_style: FontStyle::ITALIC });
535 let full_mod = highlighter.style_mod_for_stack(full_stack.as_slice());
536 assert_eq!(full_mod, StyleModifier { foreground: Some(c1), background: None, font_style: Some(FontStyle::ITALIC) });
537 }
538
539 #[test]
test_ranges()540 fn test_ranges() {
541 let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
542 let mut state = {
543 let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
544 ParseState::new(syntax)
545 };
546 let ts = ThemeSet::load_defaults();
547 let highlighter = Highlighter::new(&ts.themes["base16-ocean.dark"]);
548
549 let mut highlight_state = HighlightState::new(&highlighter, ScopeStack::new());
550 let line = "module Bob::Wow::Troll::Five; 5; end";
551 let ops = state.parse_line(line, &ps);
552 let iter = RangedHighlightIterator::new(&mut highlight_state, &ops[..], line, &highlighter);
553 let regions: Vec<(Style, &str, Range<usize>)> = iter.collect();
554 // println!("{:#?}", regions);
555 assert_eq!(regions[11],
556 (Style {
557 foreground: Color {
558 r: 208,
559 g: 135,
560 b: 112,
561 a: 0xFF,
562 },
563 background: Color {
564 r: 43,
565 g: 48,
566 b: 59,
567 a: 0xFF,
568 },
569 font_style: FontStyle::empty(),
570 },
571 "5", Range { start: 30, end: 31 }));
572 }
573 }
574