1 use super::syntax_definition::*;
2 use super::scope::*;
3 
4 #[cfg(feature = "metadata")]
5 use super::metadata::{LoadMetadata, Metadata, RawMetadataEntry};
6 
7 #[cfg(feature = "yaml-load")]
8 use super::super::LoadingError;
9 
10 use std::collections::{HashMap, HashSet};
11 use std::path::Path;
12 #[cfg(feature = "yaml-load")]
13 use walkdir::WalkDir;
14 #[cfg(feature = "yaml-load")]
15 use std::io::Read;
16 use std::io::{self, BufRead, BufReader};
17 use std::fs::File;
18 use std::mem;
19 
20 use lazycell::AtomicLazyCell;
21 use onig::Regex;
22 use parsing::syntax_definition::ContextId;
23 
24 /// A syntax set holds multiple syntaxes that have been linked together.
25 ///
26 /// Use a `SyntaxSetBuilder` to load syntax definitions and build a syntax set.
27 ///
28 /// After building, the syntax set is immutable and can no longer be modified.
29 /// But you can convert it back to a builder by using `into_builder`.
30 #[derive(Debug, Serialize, Deserialize)]
31 pub struct SyntaxSet {
32     syntaxes: Vec<SyntaxReference>,
33     contexts: Vec<Context>,
34     /// Stores the syntax index for every path that was loaded
35     path_syntaxes: Vec<(String, usize)>,
36 
37     #[serde(skip_serializing, skip_deserializing, default = "AtomicLazyCell::new")]
38     first_line_cache: AtomicLazyCell<FirstLineCache>,
39     /// Metadata, e.g. indent and commenting information.
40     /// NOTE: if serializing, you should handle metadata manually; that is,
41     /// you should serialize and deserialize it separately. See
42     /// `examples/gendata.rs` for an example.
43     #[cfg(feature = "metadata")]
44     #[serde(skip, default)]
45     pub(crate) metadata: Metadata,
46 }
47 
48 #[derive(Clone, Debug, Serialize, Deserialize)]
49 pub struct SyntaxReference {
50     pub name: String,
51     pub file_extensions: Vec<String>,
52     pub scope: Scope,
53     pub first_line_match: Option<String>,
54     pub hidden: bool,
55     #[serde(serialize_with = "ordered_map")]
56     pub variables: HashMap<String, String>,
57     #[serde(serialize_with = "ordered_map")]
58     pub(crate) contexts: HashMap<String, ContextId>,
59 }
60 
61 /// A syntax set builder is used for loading syntax definitions from the file
62 /// system or by adding `SyntaxDefinition` objects.
63 ///
64 /// Once all the syntaxes have been added, call `build` to turn the builder into
65 /// a `SyntaxSet` that can be used for parsing or highlighting.
66 #[derive(Clone, Default)]
67 pub struct SyntaxSetBuilder {
68     syntaxes: Vec<SyntaxDefinition>,
69     path_syntaxes: Vec<(String, usize)>,
70     #[cfg(feature = "metadata")]
71     raw_metadata: LoadMetadata,
72     /// If this `SyntaxSetBuilder` is created with `SyntaxSet::into_builder`
73     /// from a `SyntaxSet` that already had metadata, we keep that metadata,
74     /// merging it with newly loaded metadata.
75     #[cfg(feature = "metadata")]
76     existing_metadata: Option<Metadata>,
77 }
78 
79 #[cfg(feature = "yaml-load")]
load_syntax_file(p: &Path, lines_include_newline: bool) -> Result<SyntaxDefinition, LoadingError>80 fn load_syntax_file(p: &Path,
81                     lines_include_newline: bool)
82                     -> Result<SyntaxDefinition, LoadingError> {
83     let mut f = File::open(p)?;
84     let mut s = String::new();
85     f.read_to_string(&mut s)?;
86 
87     Ok(
88         SyntaxDefinition::load_from_str(
89             &s,
90             lines_include_newline,
91             p.file_stem().and_then(|x| x.to_str())
92         ).map_err(|e| LoadingError::ParseSyntax(e, Some(format!("{}", p.display()))))?
93     )
94 }
95 
96 impl Clone for SyntaxSet {
clone(&self) -> SyntaxSet97     fn clone(&self) -> SyntaxSet {
98         SyntaxSet {
99             syntaxes: self.syntaxes.clone(),
100             contexts: self.contexts.clone(),
101             path_syntaxes: self.path_syntaxes.clone(),
102             // Will need to be re-initialized
103             first_line_cache: AtomicLazyCell::new(),
104             #[cfg(feature = "metadata")]
105             metadata: self.metadata.clone(),
106         }
107     }
108 }
109 
110 impl Default for SyntaxSet {
default() -> Self111     fn default() -> Self {
112         SyntaxSet {
113             syntaxes: Vec::new(),
114             contexts: Vec::new(),
115             path_syntaxes: Vec::new(),
116             first_line_cache: AtomicLazyCell::new(),
117             #[cfg(feature = "metadata")]
118             metadata: Metadata::default(),
119         }
120     }
121 }
122 
123 
124 impl SyntaxSet {
new() -> SyntaxSet125     pub fn new() -> SyntaxSet {
126         SyntaxSet::default()
127     }
128 
129     /// Convenience constructor for creating a builder, then loading syntax
130     /// definitions from a folder and then building the syntax set.
131     ///
132     /// Note that this uses `lines_include_newline` set to `false`, see the
133     /// `add_from_folder` method docs on `SyntaxSetBuilder` for an explanation
134     /// as to why this might not be the best.
135     #[cfg(feature = "yaml-load")]
load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError>136     pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError> {
137         let mut builder = SyntaxSetBuilder::new();
138         builder.add_from_folder(folder, false)?;
139         Ok(builder.build())
140     }
141 
142     /// The list of syntaxes in the set
syntaxes(&self) -> &[SyntaxReference]143     pub fn syntaxes(&self) -> &[SyntaxReference] {
144         &self.syntaxes[..]
145     }
146 
147     #[cfg(feature = "metadata")]
set_metadata(&mut self, metadata: Metadata)148     pub fn set_metadata(&mut self, metadata: Metadata) {
149         self.metadata = metadata;
150     }
151 
152     /// The loaded metadata for this set.
153     #[cfg(feature = "metadata")]
metadata(&self) -> &Metadata154     pub fn metadata(&self) -> &Metadata {
155         &self.metadata
156     }
157 
158     /// Finds a syntax by its default scope, for example `source.regexp` finds the regex syntax.
159     /// This and all similar methods below do a linear search of syntaxes, this should be fast
160     /// because there aren't many syntaxes, but don't think you can call it a bajillion times per second.
find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference>161     pub fn find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference> {
162         self.syntaxes.iter().rev().find(|&s| s.scope == scope)
163     }
164 
find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference>165     pub fn find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference> {
166         self.syntaxes.iter().rev().find(|&s| name == s.name)
167     }
168 
find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference>169     pub fn find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference> {
170         self.syntaxes.iter().rev().find(|&s| s.file_extensions.iter().any(|e| e == extension))
171     }
172 
173     /// Searches for a syntax first by extension and then by case-insensitive name
174     /// useful for things like Github-flavoured-markdown code block highlighting where
175     /// all you have to go on is a short token given by the user
find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>176     pub fn find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
177         {
178             let ext_res = self.find_syntax_by_extension(s);
179             if ext_res.is_some() {
180                 return ext_res;
181             }
182         }
183         self.syntaxes.iter().rev().find(|&syntax| syntax.name.eq_ignore_ascii_case(s))
184     }
185 
186     /// Try to find the syntax for a file based on its first line.
187     /// This uses regexes that come with some sublime syntax grammars
188     /// for matching things like shebangs and mode lines like `-*- Mode: C -*-`
find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>189     pub fn find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
190         let cache = self.first_line_cache();
191         for &(ref reg, i) in cache.regexes.iter().rev() {
192             if reg.find(s).is_some() {
193                 return Some(&self.syntaxes[i]);
194             }
195         }
196         None
197     }
198 
199     /// Searches for a syntax by it's original file path when it was first loaded from disk
200     /// primarily useful for syntax tests
201     /// some may specify a Packages/PackageName/SyntaxName.sublime-syntax path
202     /// others may just have SyntaxName.sublime-syntax
203     /// this caters for these by matching the end of the path of the loaded syntax definition files
204     // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path
find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference>205     pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference> {
206         let mut slash_path = "/".to_string();
207         slash_path.push_str(&path);
208         self.path_syntaxes.iter().rev().find(|t| t.0.ends_with(&slash_path) || t.0 == path).map(|&(_,i)| &self.syntaxes[i])
209     }
210 
211     /// Convenience method that tries to find the syntax for a file path,
212     /// first by extension/name and then by first line of the file if that doesn't work.
213     /// May IO Error because it sometimes tries to read the first line of the file.
214     ///
215     /// # Examples
216     /// When determining how to highlight a file, use this in combination with a fallback to plain text:
217     ///
218     /// ```
219     /// use syntect::parsing::SyntaxSet;
220     /// let ss = SyntaxSet::load_defaults_newlines();
221     /// let syntax = ss.find_syntax_for_file("testdata/highlight_test.erb")
222     ///     .unwrap() // for IO errors, you may want to use try!() or another plain text fallback
223     ///     .unwrap_or_else(|| ss.find_syntax_plain_text());
224     /// assert_eq!(syntax.name, "HTML (Rails)");
225     /// ```
find_syntax_for_file<P: AsRef<Path>>(&self, path_obj: P) -> io::Result<Option<&SyntaxReference>>226     pub fn find_syntax_for_file<P: AsRef<Path>>(&self,
227                                                 path_obj: P)
228                                                 -> io::Result<Option<&SyntaxReference>> {
229         let path: &Path = path_obj.as_ref();
230         let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
231         let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
232         let ext_syntax = self.find_syntax_by_extension(file_name).or_else(
233                             || self.find_syntax_by_extension(extension));
234         let line_syntax = if ext_syntax.is_none() {
235             let mut line = String::new();
236             let f = File::open(path)?;
237             let mut line_reader = BufReader::new(&f);
238             line_reader.read_line(&mut line)?;
239             self.find_syntax_by_first_line(&line)
240         } else {
241             None
242         };
243         let syntax = ext_syntax.or(line_syntax);
244         Ok(syntax)
245     }
246 
247     /// Finds a syntax for plain text, which usually has no highlighting rules.
248     /// Good as a fallback when you can't find another syntax but you still want
249     /// to use the same highlighting pipeline code.
250     ///
251     /// This syntax should always be present, if not this method will panic.
252     /// If the way you load syntaxes doesn't create one, use `add_plain_text_syntax`.
253     ///
254     /// # Examples
255     /// ```
256     /// use syntect::parsing::SyntaxSetBuilder;
257     /// let mut builder = SyntaxSetBuilder::new();
258     /// builder.add_plain_text_syntax();
259     /// let ss = builder.build();
260     /// let syntax = ss.find_syntax_by_token("rs").unwrap_or_else(|| ss.find_syntax_plain_text());
261     /// assert_eq!(syntax.name, "Plain Text");
262     /// ```
find_syntax_plain_text(&self) -> &SyntaxReference263     pub fn find_syntax_plain_text(&self) -> &SyntaxReference {
264         self.find_syntax_by_name("Plain Text")
265             .expect("All syntax sets ought to have a plain text syntax")
266     }
267 
268     /// Converts this syntax set into a builder so that more syntaxes can be
269     /// added to it.
270     ///
271     /// Note that newly added syntaxes can have references to existing syntaxes
272     /// in the set, but not the other way around.
into_builder(self) -> SyntaxSetBuilder273     pub fn into_builder(self) -> SyntaxSetBuilder {
274         #[cfg(feature = "metadata")]
275         let SyntaxSet { syntaxes, contexts, path_syntaxes, metadata, .. } = self;
276         #[cfg(not(feature = "metadata"))]
277         let SyntaxSet { syntaxes, contexts, path_syntaxes, .. } = self;
278 
279         let mut context_map = HashMap::with_capacity(contexts.len());
280         for (i, context) in contexts.into_iter().enumerate() {
281             context_map.insert(i, context);
282         }
283 
284         let mut builder_syntaxes = Vec::with_capacity(syntaxes.len());
285 
286         for syntax in syntaxes {
287             let SyntaxReference {
288                 name,
289                 file_extensions,
290                 scope,
291                 first_line_match,
292                 hidden,
293                 variables,
294                 contexts,
295             } = syntax;
296 
297             let mut builder_contexts = HashMap::with_capacity(contexts.len());
298             for (name, context_id) in contexts {
299                 if let Some(context) = context_map.remove(&context_id.index()) {
300                     builder_contexts.insert(name, context);
301                 }
302             }
303 
304             let syntax_definition = SyntaxDefinition {
305                 name,
306                 file_extensions,
307                 scope,
308                 first_line_match,
309                 hidden,
310                 variables,
311                 contexts: builder_contexts,
312             };
313             builder_syntaxes.push(syntax_definition);
314         }
315 
316         SyntaxSetBuilder {
317             syntaxes: builder_syntaxes,
318             path_syntaxes,
319             #[cfg(feature = "metadata")]
320             existing_metadata: Some(metadata),
321             #[cfg(feature = "metadata")]
322             raw_metadata: LoadMetadata::default(),
323         }
324     }
325 
326     #[inline(always)]
get_context(&self, context_id: &ContextId) -> &Context327     pub(crate) fn get_context(&self, context_id: &ContextId) -> &Context {
328         &self.contexts[context_id.index()]
329     }
330 
first_line_cache(&self) -> &FirstLineCache331     fn first_line_cache(&self) -> &FirstLineCache {
332         if let Some(cache) = self.first_line_cache.borrow() {
333             cache
334         } else {
335             let cache = FirstLineCache::new(self.syntaxes());
336             self.first_line_cache.fill(cache).ok();
337             self.first_line_cache.borrow().unwrap()
338         }
339     }
340 }
341 
342 
343 impl SyntaxSetBuilder {
new() -> SyntaxSetBuilder344     pub fn new() -> SyntaxSetBuilder {
345         SyntaxSetBuilder::default()
346     }
347 
348     /// Add a syntax to the set.
add(&mut self, syntax: SyntaxDefinition)349     pub fn add(&mut self, syntax: SyntaxDefinition) {
350         self.syntaxes.push(syntax);
351     }
352 
353     /// Rarely useful method that loads in a syntax with no highlighting rules for plain text.
354     /// Exists mainly for adding the plain text syntax to syntax set dumps, because for some
355     /// reason the default Sublime plain text syntax is still in `.tmLanguage` format.
356     #[cfg(feature = "yaml-load")]
add_plain_text_syntax(&mut self)357     pub fn add_plain_text_syntax(&mut self) {
358         let s = "---\nname: Plain Text\nfile_extensions: [txt]\nscope: text.plain\ncontexts: \
359                  {main: []}";
360         let syn = SyntaxDefinition::load_from_str(s, false, None).unwrap();
361         self.syntaxes.push(syn);
362     }
363 
364     /// Loads all the .sublime-syntax files in a folder into this builder.
365     ///
366     /// The `lines_include_newline` parameter is used to work around the fact that Sublime Text normally
367     /// passes line strings including newline characters (`\n`) to its regex engine. This results in many
368     /// syntaxes having regexes matching `\n`, which doesn't work if you don't pass in newlines.
369     /// It is recommended that if you can you pass in lines with newlines if you can and pass `true` for this parameter.
370     /// If that is inconvenient pass `false` and the loader will do some hacky find and replaces on the
371     /// match regexes that seem to work for the default syntax set, but may not work for any other syntaxes.
372     ///
373     /// In the future I might include a "slow mode" that copies the lines passed in and appends a newline if there isn't one.
374     /// but in the interest of performance currently this hacky fix will have to do.
375     #[cfg(feature = "yaml-load")]
add_from_folder<P: AsRef<Path>>( &mut self, folder: P, lines_include_newline: bool ) -> Result<(), LoadingError>376     pub fn add_from_folder<P: AsRef<Path>>(
377         &mut self,
378         folder: P,
379         lines_include_newline: bool
380     ) -> Result<(), LoadingError> {
381         for entry in WalkDir::new(folder).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
382             let entry = entry.map_err(LoadingError::WalkDir)?;
383             if entry.path().extension().map_or(false, |e| e == "sublime-syntax") {
384                 let syntax = load_syntax_file(entry.path(), lines_include_newline)?;
385                 if let Some(path_str) = entry.path().to_str() {
386                     // Split the path up and rejoin with slashes so that syntaxes loaded on Windows
387                     // can still be loaded the same way.
388                     let path = Path::new(path_str);
389                     let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect();
390                     self.path_syntaxes.push((path_parts.join("/").to_string(), self.syntaxes.len()));
391                 }
392                 self.syntaxes.push(syntax);
393             }
394 
395             #[cfg(feature = "metadata")]
396             {
397                 if entry.path().extension() == Some("tmPreferences".as_ref()) {
398                     match RawMetadataEntry::load(entry.path()) {
399                         Ok(meta) => self.raw_metadata.add_raw(meta),
400                         Err(_err) => (),
401                     }
402                 }
403             }
404         }
405 
406         Ok(())
407     }
408 
409     /// Build a `SyntaxSet` from the syntaxes that have been added to this
410     /// builder.
411     ///
412     /// ### Linking
413     ///
414     /// The contexts in syntaxes can reference other contexts in the same syntax
415     /// or even other syntaxes. For example, a HTML syntax can reference a CSS
416     /// syntax so that CSS blocks in HTML work as expected.
417     ///
418     /// Those references work in various ways and involve one or two lookups.
419     /// To avoid having to do these lookups during parsing/highlighting, the
420     /// references are changed to directly reference contexts via index. That's
421     /// called linking.
422     ///
423     /// Linking is done in this build step. So in order to get the best
424     /// performance, you should try to avoid calling this too much. Ideally,
425     /// create a `SyntaxSet` once and then use it many times. If you can,
426     /// serialize a `SyntaxSet` for your program and when you run the program,
427     /// directly load the `SyntaxSet`.
build(self) -> SyntaxSet428     pub fn build(self) -> SyntaxSet {
429 
430         #[cfg(not(feature = "metadata"))]
431         let SyntaxSetBuilder { syntaxes: syntax_definitions, path_syntaxes } = self;
432         #[cfg(feature = "metadata")]
433         let SyntaxSetBuilder {
434             syntaxes: syntax_definitions,
435             path_syntaxes,
436             raw_metadata,
437             existing_metadata,
438         } = self;
439 
440         let mut syntaxes = Vec::with_capacity(syntax_definitions.len());
441         let mut all_contexts = Vec::new();
442 
443         for syntax_definition in syntax_definitions {
444             let SyntaxDefinition {
445                 name,
446                 file_extensions,
447                 scope,
448                 first_line_match,
449                 hidden,
450                 variables,
451                 contexts,
452             } = syntax_definition;
453 
454             let mut map = HashMap::new();
455 
456             let mut contexts: Vec<(String, Context)> = contexts.into_iter().collect();
457             // Sort the values of the HashMap so that the contexts in the
458             // resulting SyntaxSet have a deterministic order for serializing.
459             // Because we're sorting by the keys which are unique, we can use
460             // an unstable sort.
461             contexts.sort_unstable_by(|(name_a, _), (name_b, _)| name_a.cmp(&name_b));
462             for (name, context) in contexts {
463                 let index = all_contexts.len();
464                 map.insert(name, ContextId::new(index));
465                 all_contexts.push(context);
466             }
467 
468             let syntax = SyntaxReference {
469                 name,
470                 file_extensions,
471                 scope,
472                 first_line_match,
473                 hidden,
474                 variables,
475                 contexts: map,
476             };
477             syntaxes.push(syntax);
478         }
479 
480         for syntax in &syntaxes {
481             let mut no_prototype = HashSet::new();
482             let prototype = syntax.contexts.get("prototype");
483             if let Some(prototype_id) = prototype {
484                 // TODO: We could do this after parsing YAML, instead of here?
485                 Self::recursively_mark_no_prototype(syntax, prototype_id.index(), &all_contexts, &mut no_prototype);
486             }
487 
488             for context_id in syntax.contexts.values() {
489                 let index = context_id.index();
490                 let mut context = &mut all_contexts[index];
491                 if let Some(prototype_id) = prototype {
492                     if context.meta_include_prototype && !no_prototype.contains(&index) {
493                         context.prototype = Some(*prototype_id);
494                     }
495                 }
496                 Self::link_context(&mut context, syntax, &syntaxes);
497             }
498         }
499 
500         #[cfg(feature = "metadata")]
501         let metadata = match existing_metadata {
502             Some(mut existing) => existing.merged_with_raw(raw_metadata),
503             None => raw_metadata.into(),
504         };
505 
506         SyntaxSet {
507             syntaxes,
508             contexts: all_contexts,
509             path_syntaxes,
510             first_line_cache: AtomicLazyCell::new(),
511             #[cfg(feature = "metadata")]
512             metadata,
513         }
514     }
515 
516     /// Anything recursively included by the prototype shouldn't include the prototype.
517     /// This marks them as such.
recursively_mark_no_prototype( syntax: &SyntaxReference, context_id: usize, contexts: &[Context], no_prototype: &mut HashSet<usize>, )518     fn recursively_mark_no_prototype(
519         syntax: &SyntaxReference,
520         context_id: usize,
521         contexts: &[Context],
522         no_prototype: &mut HashSet<usize>,
523     ) {
524         let first_time = no_prototype.insert(context_id);
525         if !first_time {
526             return;
527         }
528 
529         for pattern in &contexts[context_id].patterns {
530             match *pattern {
531                 // Apparently inline blocks also don't include the prototype when within the prototype.
532                 // This is really weird, but necessary to run the YAML syntax.
533                 Pattern::Match(ref match_pat) => {
534                     let maybe_context_refs = match match_pat.operation {
535                         MatchOperation::Push(ref context_refs) |
536                         MatchOperation::Set(ref context_refs) => Some(context_refs),
537                         MatchOperation::Pop | MatchOperation::None => None,
538                     };
539                     if let Some(context_refs) = maybe_context_refs {
540                         for context_ref in context_refs.iter() {
541                             match context_ref {
542                                 ContextReference::Inline(ref s) | ContextReference::Named(ref s) => {
543                                     if let Some(i) = syntax.contexts.get(s) {
544                                         Self::recursively_mark_no_prototype(syntax, i.index(), contexts, no_prototype);
545                                     }
546                                 },
547                                 ContextReference::Direct(ref id) => {
548                                     Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
549                                 },
550                                 _ => (),
551                             }
552                         }
553                     }
554                 }
555                 Pattern::Include(ref reference) => {
556                     match reference {
557                         ContextReference::Named(ref s) => {
558                             if let Some(id) = syntax.contexts.get(s) {
559                                 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
560                             }
561                         },
562                         ContextReference::Direct(ref id) => {
563                             Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
564                         },
565                         _ => (),
566                     }
567                 }
568             }
569         }
570     }
571 
link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])572     fn link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
573         for pattern in &mut context.patterns {
574             match *pattern {
575                 Pattern::Match(ref mut match_pat) => Self::link_match_pat(match_pat, syntax, syntaxes),
576                 Pattern::Include(ref mut context_ref) => Self::link_ref(context_ref, syntax, syntaxes),
577             }
578         }
579     }
580 
link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])581     fn link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
582         // println!("{:?}", context_ref);
583         use super::syntax_definition::ContextReference::*;
584         let linked_context_id = match *context_ref {
585             Named(ref s) | Inline(ref s) => {
586                 // This isn't actually correct, but it is better than nothing/crashing.
587                 // This is being phased out anyhow, see https://github.com/sublimehq/Packages/issues/73
588                 // Fixes issue #30
589                 if s == "$top_level_main" {
590                     syntax.contexts.get("main")
591                 } else {
592                     syntax.contexts.get(s)
593                 }
594             }
595             ByScope { scope, ref sub_context } => {
596                 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
597                 syntaxes
598                     .iter()
599                     .rev()
600                     .find(|s| s.scope == scope)
601                     .and_then(|s| s.contexts.get(context_name))
602             }
603             File { ref name, ref sub_context } => {
604                 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
605                 syntaxes
606                     .iter()
607                     .rev()
608                     .find(|s| &s.name == name)
609                     .and_then(|s| s.contexts.get(context_name))
610             }
611             Direct(_) => None,
612         };
613         if let Some(context_id) = linked_context_id {
614             let mut new_ref = Direct(*context_id);
615             mem::swap(context_ref, &mut new_ref);
616         }
617     }
618 
link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])619     fn link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
620         let maybe_context_refs = match match_pat.operation {
621             MatchOperation::Push(ref mut context_refs) |
622             MatchOperation::Set(ref mut context_refs) => Some(context_refs),
623             MatchOperation::Pop | MatchOperation::None => None,
624         };
625         if let Some(context_refs) = maybe_context_refs {
626             for context_ref in context_refs.iter_mut() {
627                 Self::link_ref(context_ref, syntax, syntaxes);
628             }
629         }
630         if let Some(ref mut context_ref) = match_pat.with_prototype {
631             Self::link_ref(context_ref, syntax, syntaxes);
632         }
633     }
634 }
635 
636 #[derive(Debug)]
637 struct FirstLineCache {
638     /// (first line regex, syntax index) pairs for all syntaxes with a first line regex
639     regexes: Vec<(Regex, usize)>,
640 }
641 
642 impl FirstLineCache {
new(syntaxes: &[SyntaxReference]) -> FirstLineCache643     fn new(syntaxes: &[SyntaxReference]) -> FirstLineCache {
644         let mut regexes = Vec::new();
645         for (i, syntax) in syntaxes.iter().enumerate() {
646             if let Some(ref reg_str) = syntax.first_line_match {
647                 if let Ok(reg) = Regex::new(reg_str) {
648                     regexes.push((reg, i));
649                 }
650             }
651         }
652         FirstLineCache {
653             regexes,
654         }
655     }
656 }
657 
658 
659 #[cfg(feature = "yaml-load")]
660 #[cfg(test)]
661 mod tests {
662     use super::*;
663     use parsing::{ParseState, Scope, syntax_definition};
664     use std::collections::HashMap;
665 
666     #[test]
can_load()667     fn can_load() {
668         let mut builder = SyntaxSetBuilder::new();
669         builder.add_from_folder("testdata/Packages", false).unwrap();
670 
671         let cmake_dummy_syntax = SyntaxDefinition {
672             name: "CMake".to_string(),
673             file_extensions: vec!["CMakeLists.txt".to_string(), "cmake".to_string()],
674             scope: Scope::new("source.cmake").unwrap(),
675             first_line_match: None,
676             hidden: false,
677             variables: HashMap::new(),
678             contexts: HashMap::new(),
679         };
680 
681         builder.add(cmake_dummy_syntax);
682         builder.add_plain_text_syntax();
683 
684         let ps = builder.build();
685 
686         assert_eq!(&ps.find_syntax_by_first_line("#!/usr/bin/env node").unwrap().name,
687                    "JavaScript");
688         let rails_scope = Scope::new("source.ruby.rails").unwrap();
689         let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
690         ps.find_syntax_plain_text();
691         assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
692         assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
693         assert_eq!(&ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line").unwrap().name,
694                    "C");
695         assert_eq!(&ps.find_syntax_for_file("testdata/parser.rs").unwrap().unwrap().name,
696                    "Rust");
697         assert_eq!(&ps.find_syntax_for_file("testdata/test_first_line.test")
698                        .expect("Error finding syntax for file")
699                        .expect("No syntax found for file")
700                        .name,
701                    "Ruby");
702         assert_eq!(&ps.find_syntax_for_file(".bashrc").unwrap().unwrap().name,
703                    "Bourne Again Shell (bash)");
704         assert_eq!(&ps.find_syntax_for_file("CMakeLists.txt").unwrap().unwrap().name,
705                    "CMake");
706         assert_eq!(&ps.find_syntax_for_file("test.cmake").unwrap().unwrap().name,
707                    "CMake");
708         assert_eq!(&ps.find_syntax_for_file("Rakefile").unwrap().unwrap().name, "Ruby");
709         assert!(&ps.find_syntax_by_first_line("derp derp hi lol").is_none());
710         assert_eq!(&ps.find_syntax_by_path("Packages/Rust/Rust.sublime-syntax").unwrap().name,
711                    "Rust");
712         // println!("{:#?}", syntax);
713         assert_eq!(syntax.scope, rails_scope);
714         // assert!(false);
715         let main_context = ps.get_context(&syntax.contexts["main"]);
716         let count = syntax_definition::context_iter(&ps, main_context).count();
717         assert_eq!(count, 109);
718     }
719 
720     #[test]
can_clone()721     fn can_clone() {
722         let cloned_syntax_set = {
723             let mut builder = SyntaxSetBuilder::new();
724             builder.add(syntax_a());
725             builder.add(syntax_b());
726 
727             let syntax_set_original = builder.build();
728             syntax_set_original.clone()
729             // Note: The original syntax set is dropped
730         };
731 
732         let syntax = cloned_syntax_set.find_syntax_by_extension("a").unwrap();
733         let mut parse_state = ParseState::new(syntax);
734         let ops = parse_state.parse_line("a go_b b", &cloned_syntax_set);
735         let expected = (7, ScopeStackOp::Push(Scope::new("b").unwrap()));
736         assert_ops_contain(&ops, &expected);
737     }
738 
739     #[test]
can_add_more_syntaxes_with_builder()740     fn can_add_more_syntaxes_with_builder() {
741         let syntax_set_original = {
742             let mut builder = SyntaxSetBuilder::new();
743             builder.add(syntax_a());
744             builder.add(syntax_b());
745             builder.build()
746         };
747 
748         let mut builder = syntax_set_original.into_builder();
749 
750         let syntax_c = SyntaxDefinition::load_from_str(r#"
751         name: C
752         scope: source.c
753         file_extensions: [c]
754         contexts:
755           main:
756             - match: 'c'
757               scope: c
758             - match: 'go_a'
759               push: scope:source.a#main
760         "#, true, None).unwrap();
761 
762         builder.add(syntax_c);
763 
764         let syntax_set = builder.build();
765 
766         let syntax = syntax_set.find_syntax_by_extension("c").unwrap();
767         let mut parse_state = ParseState::new(syntax);
768         let ops = parse_state.parse_line("c go_a a go_b b", &syntax_set);
769         let expected = (14, ScopeStackOp::Push(Scope::new("b").unwrap()));
770         assert_ops_contain(&ops, &expected);
771     }
772 
773     #[test]
can_use_in_multiple_threads()774     fn can_use_in_multiple_threads() {
775         use rayon::prelude::*;
776 
777         let syntax_set = {
778             let mut builder = SyntaxSetBuilder::new();
779             builder.add(syntax_a());
780             builder.add(syntax_b());
781             builder.build()
782         };
783 
784         let lines = vec![
785             "a a a",
786             "a go_b b",
787             "go_b b",
788             "go_b b  b",
789         ];
790 
791         let results: Vec<Vec<(usize, ScopeStackOp)>> = lines
792             .par_iter()
793             .map(|line| {
794                 let syntax = syntax_set.find_syntax_by_extension("a").unwrap();
795                 let mut parse_state = ParseState::new(syntax);
796                 parse_state.parse_line(line, &syntax_set)
797             })
798             .collect();
799 
800         assert_ops_contain(&results[0], &(4, ScopeStackOp::Push(Scope::new("a").unwrap())));
801         assert_ops_contain(&results[1], &(7, ScopeStackOp::Push(Scope::new("b").unwrap())));
802         assert_ops_contain(&results[2], &(5, ScopeStackOp::Push(Scope::new("b").unwrap())));
803         assert_ops_contain(&results[3], &(8, ScopeStackOp::Push(Scope::new("b").unwrap())));
804     }
805 
806     #[test]
is_sync()807     fn is_sync() {
808         check_sync::<SyntaxSet>();
809     }
810 
811     #[test]
is_send()812     fn is_send() {
813         check_send::<SyntaxSet>();
814     }
815 
816     #[test]
can_override_syntaxes()817     fn can_override_syntaxes() {
818         let syntax_set = {
819             let mut builder = SyntaxSetBuilder::new();
820             builder.add(syntax_a());
821             builder.add(syntax_b());
822 
823             let syntax_a2 = SyntaxDefinition::load_from_str(r#"
824                 name: A improved
825                 scope: source.a
826                 file_extensions: [a]
827                 first_line_match: syntax\s+a
828                 contexts:
829                   main:
830                     - match: a
831                       scope: a2
832                     - match: go_b
833                       push: scope:source.b#main
834                 "#, true, None).unwrap();
835 
836             builder.add(syntax_a2);
837 
838             let syntax_c = SyntaxDefinition::load_from_str(r#"
839                 name: C
840                 scope: source.c
841                 file_extensions: [c]
842                 first_line_match: syntax\s+.*
843                 contexts:
844                   main:
845                     - match: c
846                       scope: c
847                     - match: go_a
848                       push: scope:source.a#main
849                 "#, true, None).unwrap();
850 
851             builder.add(syntax_c);
852 
853             builder.build()
854         };
855 
856         let mut syntax = syntax_set.find_syntax_by_extension("a").unwrap();
857         assert_eq!(syntax.name, "A improved");
858         syntax = syntax_set.find_syntax_by_scope(Scope::new(&"source.a").unwrap()).unwrap();
859         assert_eq!(syntax.name, "A improved");
860         syntax = syntax_set.find_syntax_by_first_line(&"syntax a").unwrap();
861         assert_eq!(syntax.name, "C");
862 
863         let mut parse_state = ParseState::new(syntax);
864         let ops = parse_state.parse_line("c go_a a", &syntax_set);
865         let expected = (7, ScopeStackOp::Push(Scope::new("a2").unwrap()));
866         assert_ops_contain(&ops, &expected);
867     }
868 
869     #[test]
can_parse_issue219()870     fn can_parse_issue219() {
871         // Go to builder and back after loading so that build() gets Direct references instead of
872         // Named ones. The bug was that Direct references were not handled when marking as
873         // "no prototype", so prototype contexts accidentally had the prototype set, which made
874         // the parser loop forever.
875         let syntax_set = SyntaxSet::load_defaults_newlines().into_builder().build();
876         let syntax = syntax_set.find_syntax_by_extension("yaml").unwrap();
877 
878         let mut parse_state = ParseState::new(syntax);
879         let ops = parse_state.parse_line("# test\n", &syntax_set);
880         let expected = (0, ScopeStackOp::Push(Scope::new("comment.line.number-sign.yaml").unwrap()));
881         assert_ops_contain(&ops, &expected);
882     }
883 
884     #[test]
no_prototype_for_contexts_included_from_prototype()885     fn no_prototype_for_contexts_included_from_prototype() {
886         let mut builder = SyntaxSetBuilder::new();
887         let syntax = SyntaxDefinition::load_from_str(r#"
888                 name: Test Prototype
889                 scope: source.test
890                 file_extensions: [test]
891                 contexts:
892                   prototype:
893                     - include: included_from_prototype
894                   main:
895                     - match: main
896                     - match: other
897                       push: other
898                   other:
899                     - match: o
900                   included_from_prototype:
901                     - match: p
902                       scope: p
903                 "#, true, None).unwrap();
904         builder.add(syntax);
905         let ss = builder.build();
906 
907         // "main" and "other" should have context set, "prototype" and "included_from_prototype"
908         // must not have a prototype set.
909         assert_prototype_only_on(&["main", "other"], &ss, &ss.syntaxes()[0]);
910 
911         // Building again should have the same result. The difference is that after the first
912         // build(), the references have been replaced with Direct references, so the code needs to
913         // handle that correctly.
914         let rebuilt = ss.into_builder().build();
915         assert_prototype_only_on(&["main", "other"], &rebuilt, &rebuilt.syntaxes()[0]);
916     }
917 
918     #[test]
no_prototype_for_contexts_inline_in_prototype()919     fn no_prototype_for_contexts_inline_in_prototype() {
920         let mut builder = SyntaxSetBuilder::new();
921         let syntax = SyntaxDefinition::load_from_str(r#"
922                 name: Test Prototype
923                 scope: source.test
924                 file_extensions: [test]
925                 contexts:
926                   prototype:
927                     - match: p
928                       push:
929                         - match: p2
930                   main:
931                     - match: main
932                 "#, true, None).unwrap();
933         builder.add(syntax);
934         let ss = builder.build();
935 
936         assert_prototype_only_on(&["main"], &ss, &ss.syntaxes()[0]);
937 
938         let rebuilt = ss.into_builder().build();
939         assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
940     }
941 
assert_ops_contain( ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp) )942     fn assert_ops_contain(
943         ops: &[(usize, ScopeStackOp)],
944         expected: &(usize, ScopeStackOp)
945     ) {
946         assert!(ops.contains(expected),
947                 "expected operations to contain {:?}: {:?}", expected, ops);
948     }
949 
assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference)950     fn assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference) {
951         for (name, id) in &syntax.contexts {
952             if name == "__main" || name == "__start" {
953                 // Skip special contexts
954                 continue;
955             }
956             let context = syntax_set.get_context(id);
957             if expected.contains(&name.as_str()) {
958                 assert!(context.prototype.is_some(), "Expected context {} to have prototype", name);
959             } else {
960                 assert!(context.prototype.is_none(), "Expected context {} to not have prototype", name);
961             }
962         }
963     }
964 
check_send<T: Send>()965     fn check_send<T: Send>() {}
966 
check_sync<T: Sync>()967     fn check_sync<T: Sync>() {}
968 
syntax_a() -> SyntaxDefinition969     fn syntax_a() -> SyntaxDefinition {
970         SyntaxDefinition::load_from_str(
971             r#"
972             name: A
973             scope: source.a
974             file_extensions: [a]
975             contexts:
976               main:
977                 - match: 'a'
978                   scope: a
979                 - match: 'go_b'
980                   push: scope:source.b#main
981             "#,
982             true,
983             None,
984         ).unwrap()
985     }
986 
syntax_b() -> SyntaxDefinition987     fn syntax_b() -> SyntaxDefinition {
988         SyntaxDefinition::load_from_str(
989             r#"
990             name: B
991             scope: source.b
992             file_extensions: [b]
993             contexts:
994               main:
995                 - match: 'b'
996                   scope: b
997             "#,
998             true,
999             None,
1000         ).unwrap()
1001     }
1002 }
1003