1 use super::syntax_definition::*;
2 use super::scope::*;
3
4 #[cfg(feature = "metadata")]
5 use super::metadata::{LoadMetadata, Metadata, RawMetadataEntry};
6
7 #[cfg(feature = "yaml-load")]
8 use super::super::LoadingError;
9
10 use std::collections::{HashMap, HashSet, BTreeSet};
11 use std::path::Path;
12 #[cfg(feature = "yaml-load")]
13 use walkdir::WalkDir;
14 #[cfg(feature = "yaml-load")]
15 use std::io::Read;
16 use std::io::{self, BufRead, BufReader};
17 use std::fs::File;
18 use std::mem;
19
20 use lazycell::AtomicLazyCell;
21 use super::regex::Regex;
22 use crate::parsing::syntax_definition::ContextId;
23
24 /// A syntax set holds multiple syntaxes that have been linked together.
25 ///
26 /// Use a [`SyntaxSetBuilder`] to load syntax definitions and build a syntax set.
27 ///
28 /// After building, the syntax set is immutable and can no longer be modified, but you can convert
29 /// it back into a builder by using the [`into_builder`] method.
30 ///
31 /// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
32 /// [`into_builder`]: #method.into_builder
33 #[derive(Debug, Serialize, Deserialize)]
34 pub struct SyntaxSet {
35 syntaxes: Vec<SyntaxReference>,
36 contexts: Vec<Context>,
37 /// Stores the syntax index for every path that was loaded
38 path_syntaxes: Vec<(String, usize)>,
39
40 #[serde(skip_serializing, skip_deserializing, default = "AtomicLazyCell::new")]
41 first_line_cache: AtomicLazyCell<FirstLineCache>,
42 /// Metadata, e.g. indent and commenting information.
43 ///
44 /// NOTE: if serializing, you should handle metadata manually; that is, you should serialize and
45 /// deserialize it separately. See `examples/gendata.rs` for an example.
46 #[cfg(feature = "metadata")]
47 #[serde(skip, default)]
48 pub(crate) metadata: Metadata,
49 }
50
51 /// A linked version of a [`SyntaxDefinition`] that is only useful as part of the
52 /// [`SyntaxSet`] that contains it. See docs for [`SyntaxSetBuilder::build`] for
53 /// more info.
54 #[derive(Clone, Debug, Serialize, Deserialize)]
55 pub struct SyntaxReference {
56 pub name: String,
57 pub file_extensions: Vec<String>,
58 pub scope: Scope,
59 pub first_line_match: Option<String>,
60 pub hidden: bool,
61 #[serde(serialize_with = "ordered_map")]
62 pub variables: HashMap<String, String>,
63 #[serde(serialize_with = "ordered_map")]
64 pub(crate) contexts: HashMap<String, ContextId>,
65 }
66
67 /// A syntax set builder is used for loading syntax definitions from the file
68 /// system or by adding [`SyntaxDefinition`] objects.
69 ///
70 /// Once all the syntaxes have been added, call [`build`] to turn the builder into
71 /// a [`SyntaxSet`] that can be used for parsing or highlighting.
72 ///
73 /// [`SyntaxDefinition`]: syntax_definition/struct.SyntaxDefinition.html
74 /// [`build`]: #method.build
75 /// [`SyntaxSet`]: struct.SyntaxSet.html
76 #[derive(Clone, Default)]
77 pub struct SyntaxSetBuilder {
78 syntaxes: Vec<SyntaxDefinition>,
79 path_syntaxes: Vec<(String, usize)>,
80 #[cfg(feature = "metadata")]
81 raw_metadata: LoadMetadata,
82
83 /// If this `SyntaxSetBuilder` is created with `SyntaxSet::into_builder`
84 /// from a `SyntaxSet` that already had metadata, we keep that metadata,
85 /// merging it with newly loaded metadata.
86 #[cfg(feature = "metadata")]
87 existing_metadata: Option<Metadata>,
88 }
89
90 #[cfg(feature = "yaml-load")]
load_syntax_file(p: &Path, lines_include_newline: bool) -> Result<SyntaxDefinition, LoadingError>91 fn load_syntax_file(p: &Path,
92 lines_include_newline: bool)
93 -> Result<SyntaxDefinition, LoadingError> {
94 let mut f = File::open(p)?;
95 let mut s = String::new();
96 f.read_to_string(&mut s)?;
97
98 Ok(
99 SyntaxDefinition::load_from_str(
100 &s,
101 lines_include_newline,
102 p.file_stem().and_then(|x| x.to_str())
103 ).map_err(|e| LoadingError::ParseSyntax(e, Some(format!("{}", p.display()))))?
104 )
105 }
106
107 impl Clone for SyntaxSet {
clone(&self) -> SyntaxSet108 fn clone(&self) -> SyntaxSet {
109 SyntaxSet {
110 syntaxes: self.syntaxes.clone(),
111 contexts: self.contexts.clone(),
112 path_syntaxes: self.path_syntaxes.clone(),
113 // Will need to be re-initialized
114 first_line_cache: AtomicLazyCell::new(),
115 #[cfg(feature = "metadata")]
116 metadata: self.metadata.clone(),
117 }
118 }
119 }
120
121 impl Default for SyntaxSet {
default() -> Self122 fn default() -> Self {
123 SyntaxSet {
124 syntaxes: Vec::new(),
125 contexts: Vec::new(),
126 path_syntaxes: Vec::new(),
127 first_line_cache: AtomicLazyCell::new(),
128 #[cfg(feature = "metadata")]
129 metadata: Metadata::default(),
130 }
131 }
132 }
133
134
135 impl SyntaxSet {
new() -> SyntaxSet136 pub fn new() -> SyntaxSet {
137 SyntaxSet::default()
138 }
139
140 /// Convenience constructor for creating a builder, then loading syntax
141 /// definitions from a folder and then building the syntax set.
142 ///
143 /// Note that this uses `lines_include_newline` set to `false`, see the
144 /// [`add_from_folder`] method docs on [`SyntaxSetBuilder`] for an explanation
145 /// as to why this might not be the best.
146 ///
147 /// [`add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
148 /// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
149 #[cfg(feature = "yaml-load")]
load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError>150 pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError> {
151 let mut builder = SyntaxSetBuilder::new();
152 builder.add_from_folder(folder, false)?;
153 Ok(builder.build())
154 }
155
156 /// The list of syntaxes in the set
syntaxes(&self) -> &[SyntaxReference]157 pub fn syntaxes(&self) -> &[SyntaxReference] {
158 &self.syntaxes[..]
159 }
160
161 #[cfg(feature = "metadata")]
set_metadata(&mut self, metadata: Metadata)162 pub fn set_metadata(&mut self, metadata: Metadata) {
163 self.metadata = metadata;
164 }
165
166 /// The loaded metadata for this set.
167 #[cfg(feature = "metadata")]
metadata(&self) -> &Metadata168 pub fn metadata(&self) -> &Metadata {
169 &self.metadata
170 }
171
172 /// Finds a syntax by its default scope, for example `source.regexp` finds the regex syntax.
173 ///
174 /// This and all similar methods below do a linear search of syntaxes, this should be fast
175 /// because there aren't many syntaxes, but don't think you can call it a bajillion times per
176 /// second.
find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference>177 pub fn find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference> {
178 self.syntaxes.iter().rev().find(|&s| s.scope == scope)
179 }
180
find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference>181 pub fn find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference> {
182 self.syntaxes.iter().rev().find(|&s| name == s.name)
183 }
184
find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference>185 pub fn find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference> {
186 self.syntaxes.iter().rev().find(|&s| s.file_extensions.iter().any(|e| e == extension))
187 }
188
189 /// Searches for a syntax first by extension and then by case-insensitive name
190 ///
191 /// This is useful for things like Github-flavoured-markdown code block highlighting where all
192 /// you have to go on is a short token given by the user
find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>193 pub fn find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
194 {
195 let ext_res = self.find_syntax_by_extension(s);
196 if ext_res.is_some() {
197 return ext_res;
198 }
199 }
200 self.syntaxes.iter().rev().find(|&syntax| syntax.name.eq_ignore_ascii_case(s))
201 }
202
203 /// Try to find the syntax for a file based on its first line
204 ///
205 /// This uses regexes that come with some sublime syntax grammars for matching things like
206 /// shebangs and mode lines like `-*- Mode: C -*-`
find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference>207 pub fn find_syntax_by_first_line<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
208 let cache = self.first_line_cache();
209 for &(ref reg, i) in cache.regexes.iter().rev() {
210 if reg.search(s, 0, s.len(), None) {
211 return Some(&self.syntaxes[i]);
212 }
213 }
214 None
215 }
216
217 /// Searches for a syntax by it's original file path when it was first loaded from disk
218 ///
219 /// This is primarily useful for syntax tests. Some may specify a
220 /// `Packages/PackageName/SyntaxName.sublime-syntax` path, and others may just have
221 /// `SyntaxName.sublime-syntax`. This caters for these by matching the end of the path of the
222 /// loaded syntax definition files
223 // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path
find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference>224 pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference> {
225 let mut slash_path = "/".to_string();
226 slash_path.push_str(&path);
227 self.path_syntaxes.iter().rev().find(|t| t.0.ends_with(&slash_path) || t.0 == path).map(|&(_,i)| &self.syntaxes[i])
228 }
229
230 /// Convenience method that tries to find the syntax for a file path, first by extension/name
231 /// and then by first line of the file if that doesn't work.
232 ///
233 /// May IO Error because it sometimes tries to read the first line of the file.
234 ///
235 /// # Examples
236 ///
237 /// When determining how to highlight a file, use this in combination with a fallback to plain
238 /// text:
239 ///
240 /// ```
241 /// use syntect::parsing::SyntaxSet;
242 /// let ss = SyntaxSet::load_defaults_newlines();
243 /// let syntax = ss.find_syntax_for_file("testdata/highlight_test.erb")
244 /// .unwrap() // for IO errors, you may want to use try!() or another plain text fallback
245 /// .unwrap_or_else(|| ss.find_syntax_plain_text());
246 /// assert_eq!(syntax.name, "HTML (Rails)");
247 /// ```
find_syntax_for_file<P: AsRef<Path>>(&self, path_obj: P) -> io::Result<Option<&SyntaxReference>>248 pub fn find_syntax_for_file<P: AsRef<Path>>(&self,
249 path_obj: P)
250 -> io::Result<Option<&SyntaxReference>> {
251 let path: &Path = path_obj.as_ref();
252 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
253 let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
254 let ext_syntax = self.find_syntax_by_extension(file_name).or_else(
255 || self.find_syntax_by_extension(extension));
256 let line_syntax = if ext_syntax.is_none() {
257 let mut line = String::new();
258 let f = File::open(path)?;
259 let mut line_reader = BufReader::new(&f);
260 line_reader.read_line(&mut line)?;
261 self.find_syntax_by_first_line(&line)
262 } else {
263 None
264 };
265 let syntax = ext_syntax.or(line_syntax);
266 Ok(syntax)
267 }
268
269 /// Finds a syntax for plain text, which usually has no highlighting rules.
270 ///
271 /// This is good as a fallback when you can't find another syntax but you still want to use the
272 /// same highlighting pipeline code.
273 ///
274 /// This syntax should always be present, if not this method will panic. If the way you load
275 /// syntaxes doesn't create one, use [`add_plain_text_syntax`].
276 ///
277 /// # Examples
278 /// ```
279 /// use syntect::parsing::SyntaxSetBuilder;
280 /// let mut builder = SyntaxSetBuilder::new();
281 /// builder.add_plain_text_syntax();
282 /// let ss = builder.build();
283 /// let syntax = ss.find_syntax_by_token("rs").unwrap_or_else(|| ss.find_syntax_plain_text());
284 /// assert_eq!(syntax.name, "Plain Text");
285 /// ```
286 ///
287 /// [`add_plain_text_syntax`]: struct.SyntaxSetBuilder.html#method.add_plain_text_syntax
find_syntax_plain_text(&self) -> &SyntaxReference288 pub fn find_syntax_plain_text(&self) -> &SyntaxReference {
289 self.find_syntax_by_name("Plain Text")
290 .expect("All syntax sets ought to have a plain text syntax")
291 }
292
293 /// Converts this syntax set into a builder so that more syntaxes can be
294 /// added to it.
295 ///
296 /// Note that newly added syntaxes can have references to existing syntaxes
297 /// in the set, but not the other way around.
into_builder(self) -> SyntaxSetBuilder298 pub fn into_builder(self) -> SyntaxSetBuilder {
299 #[cfg(feature = "metadata")]
300 let SyntaxSet { syntaxes, contexts, path_syntaxes, metadata, .. } = self;
301 #[cfg(not(feature = "metadata"))]
302 let SyntaxSet { syntaxes, contexts, path_syntaxes, .. } = self;
303
304 let mut context_map = HashMap::with_capacity(contexts.len());
305 for (i, context) in contexts.into_iter().enumerate() {
306 context_map.insert(i, context);
307 }
308
309 let mut builder_syntaxes = Vec::with_capacity(syntaxes.len());
310
311 for syntax in syntaxes {
312 let SyntaxReference {
313 name,
314 file_extensions,
315 scope,
316 first_line_match,
317 hidden,
318 variables,
319 contexts,
320 } = syntax;
321
322 let mut builder_contexts = HashMap::with_capacity(contexts.len());
323 for (name, context_id) in contexts {
324 if let Some(context) = context_map.remove(&context_id.index()) {
325 builder_contexts.insert(name, context);
326 }
327 }
328
329 let syntax_definition = SyntaxDefinition {
330 name,
331 file_extensions,
332 scope,
333 first_line_match,
334 hidden,
335 variables,
336 contexts: builder_contexts,
337 };
338 builder_syntaxes.push(syntax_definition);
339 }
340
341 SyntaxSetBuilder {
342 syntaxes: builder_syntaxes,
343 path_syntaxes,
344 #[cfg(feature = "metadata")]
345 existing_metadata: Some(metadata),
346 #[cfg(feature = "metadata")]
347 raw_metadata: LoadMetadata::default(),
348 }
349 }
350
351 #[inline(always)]
get_context(&self, context_id: &ContextId) -> &Context352 pub(crate) fn get_context(&self, context_id: &ContextId) -> &Context {
353 &self.contexts[context_id.index()]
354 }
355
first_line_cache(&self) -> &FirstLineCache356 fn first_line_cache(&self) -> &FirstLineCache {
357 if let Some(cache) = self.first_line_cache.borrow() {
358 cache
359 } else {
360 let cache = FirstLineCache::new(self.syntaxes());
361 self.first_line_cache.fill(cache).ok();
362 self.first_line_cache.borrow().unwrap()
363 }
364 }
365
find_unlinked_contexts(&self) -> BTreeSet<String>366 pub fn find_unlinked_contexts(&self) -> BTreeSet<String> {
367 let SyntaxSet { syntaxes, contexts, .. } = self;
368
369 let mut context_map = HashMap::with_capacity(contexts.len());
370 for (i, context) in contexts.into_iter().enumerate() {
371 context_map.insert(i, context);
372 }
373
374 let mut unlinked_contexts = BTreeSet::new();
375
376 for syntax in syntaxes {
377 let SyntaxReference {
378 name,
379 scope,
380 contexts,
381 ..
382 } = syntax;
383
384 for (_, context_id) in contexts {
385 if let Some(context) = context_map.remove(&context_id.index()) {
386 for pattern in context.patterns.iter() {
387 let maybe_refs_to_check = match pattern {
388 Pattern::Match(match_pat) => {
389 match &match_pat.operation {
390 MatchOperation::Push(context_refs) => {
391 Some(context_refs)
392 },
393 MatchOperation::Set(context_refs) => {
394 Some(context_refs)
395 },
396 _ => None,
397 }
398 },
399 _ => None,
400 };
401
402 for context_ref in maybe_refs_to_check.into_iter().flatten() {
403 match context_ref {
404 ContextReference::Direct(_) => {},
405 _ => {
406 unlinked_contexts.insert(
407 format!(
408 "Syntax '{}' with scope '{}' has unresolved context reference {:?}",
409 name, scope, &context_ref
410 )
411 );
412 },
413 }
414 }
415 }
416 }
417 }
418 }
419 unlinked_contexts
420 }
421 }
422
423
424 impl SyntaxSetBuilder {
new() -> SyntaxSetBuilder425 pub fn new() -> SyntaxSetBuilder {
426 SyntaxSetBuilder::default()
427 }
428
429 /// Add a syntax to the set.
add(&mut self, syntax: SyntaxDefinition)430 pub fn add(&mut self, syntax: SyntaxDefinition) {
431 self.syntaxes.push(syntax);
432 }
433
434 /// The list of syntaxes added so far.
syntaxes(&self) -> &[SyntaxDefinition]435 pub fn syntaxes(&self) -> &[SyntaxDefinition] {
436 &self.syntaxes[..]
437 }
438
439 /// A rarely useful method that loads in a syntax with no highlighting rules for plain text
440 ///
441 /// Exists mainly for adding the plain text syntax to syntax set dumps, because for some reason
442 /// the default Sublime plain text syntax is still in `.tmLanguage` format.
443 #[cfg(feature = "yaml-load")]
add_plain_text_syntax(&mut self)444 pub fn add_plain_text_syntax(&mut self) {
445 let s = "---\nname: Plain Text\nfile_extensions: [txt]\nscope: text.plain\ncontexts: \
446 {main: []}";
447 let syn = SyntaxDefinition::load_from_str(s, false, None).unwrap();
448 self.syntaxes.push(syn);
449 }
450
451 /// Loads all the `.sublime-syntax` files in a folder into this builder.
452 ///
453 /// The `lines_include_newline` parameter is used to work around the fact that Sublime Text
454 /// normally passes line strings including newline characters (`\n`) to its regex engine. This
455 /// results in many syntaxes having regexes matching `\n`, which doesn't work if you don't pass
456 /// in newlines. It is recommended that if you can you pass in lines with newlines if you can
457 /// and pass `true` for this parameter. If that is inconvenient pass `false` and the loader
458 /// will do some hacky find and replaces on the match regexes that seem to work for the default
459 /// syntax set, but may not work for any other syntaxes.
460 ///
461 /// In the future I might include a "slow mode" that copies the lines passed in and appends a
462 /// newline if there isn't one, but in the interest of performance currently this hacky fix will
463 /// have to do.
464 #[cfg(feature = "yaml-load")]
add_from_folder<P: AsRef<Path>>( &mut self, folder: P, lines_include_newline: bool ) -> Result<(), LoadingError>465 pub fn add_from_folder<P: AsRef<Path>>(
466 &mut self,
467 folder: P,
468 lines_include_newline: bool
469 ) -> Result<(), LoadingError> {
470 for entry in WalkDir::new(folder).sort_by(|a, b| a.file_name().cmp(b.file_name())) {
471 let entry = entry.map_err(LoadingError::WalkDir)?;
472 if entry.path().extension().map_or(false, |e| e == "sublime-syntax") {
473 let syntax = load_syntax_file(entry.path(), lines_include_newline)?;
474 if let Some(path_str) = entry.path().to_str() {
475 // Split the path up and rejoin with slashes so that syntaxes loaded on Windows
476 // can still be loaded the same way.
477 let path = Path::new(path_str);
478 let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect();
479 self.path_syntaxes.push((path_parts.join("/").to_string(), self.syntaxes.len()));
480 }
481 self.syntaxes.push(syntax);
482 }
483
484 #[cfg(feature = "metadata")]
485 {
486 if entry.path().extension() == Some("tmPreferences".as_ref()) {
487 match RawMetadataEntry::load(entry.path()) {
488 Ok(meta) => self.raw_metadata.add_raw(meta),
489 Err(_err) => (),
490 }
491 }
492 }
493 }
494
495 Ok(())
496 }
497
498 /// Build a [`SyntaxSet`] from the syntaxes that have been added to this
499 /// builder.
500 ///
501 /// ### Linking
502 ///
503 /// The contexts in syntaxes can reference other contexts in the same syntax
504 /// or even other syntaxes. For example, a HTML syntax can reference a CSS
505 /// syntax so that CSS blocks in HTML work as expected.
506 ///
507 /// Those references work in various ways and involve one or two lookups.
508 /// To avoid having to do these lookups during parsing/highlighting, the
509 /// references are changed to directly reference contexts via index. That's
510 /// called linking.
511 ///
512 /// Linking is done in this build step. So in order to get the best
513 /// performance, you should try to avoid calling this too much. Ideally,
514 /// create a [`SyntaxSet`] once and then use it many times. If you can,
515 /// serialize a [`SyntaxSet`] for your program and when you run the program,
516 /// directly load the [`SyntaxSet`].
517 ///
518 /// [`SyntaxSet`]: struct.SyntaxSet.html
build(self) -> SyntaxSet519 pub fn build(self) -> SyntaxSet {
520
521 #[cfg(not(feature = "metadata"))]
522 let SyntaxSetBuilder { syntaxes: syntax_definitions, path_syntaxes } = self;
523 #[cfg(feature = "metadata")]
524 let SyntaxSetBuilder {
525 syntaxes: syntax_definitions,
526 path_syntaxes,
527 raw_metadata,
528 existing_metadata,
529 } = self;
530
531 let mut syntaxes = Vec::with_capacity(syntax_definitions.len());
532 let mut all_contexts = Vec::new();
533
534 for syntax_definition in syntax_definitions {
535 let SyntaxDefinition {
536 name,
537 file_extensions,
538 scope,
539 first_line_match,
540 hidden,
541 variables,
542 contexts,
543 } = syntax_definition;
544
545 let mut map = HashMap::new();
546
547 let mut contexts: Vec<(String, Context)> = contexts.into_iter().collect();
548 // Sort the values of the HashMap so that the contexts in the
549 // resulting SyntaxSet have a deterministic order for serializing.
550 // Because we're sorting by the keys which are unique, we can use
551 // an unstable sort.
552 contexts.sort_unstable_by(|(name_a, _), (name_b, _)| name_a.cmp(&name_b));
553 for (name, context) in contexts {
554 let index = all_contexts.len();
555 map.insert(name, ContextId::new(index));
556 all_contexts.push(context);
557 }
558
559 let syntax = SyntaxReference {
560 name,
561 file_extensions,
562 scope,
563 first_line_match,
564 hidden,
565 variables,
566 contexts: map,
567 };
568 syntaxes.push(syntax);
569 }
570
571 let mut found_more_backref_includes = true;
572 for syntax in &syntaxes {
573 let mut no_prototype = HashSet::new();
574 let prototype = syntax.contexts.get("prototype");
575 if let Some(prototype_id) = prototype {
576 // TODO: We could do this after parsing YAML, instead of here?
577 Self::recursively_mark_no_prototype(syntax, prototype_id.index(), &all_contexts, &mut no_prototype);
578 }
579
580 for context_id in syntax.contexts.values() {
581 let index = context_id.index();
582 let mut context = &mut all_contexts[index];
583 if let Some(prototype_id) = prototype {
584 if context.meta_include_prototype && !no_prototype.contains(&index) {
585 context.prototype = Some(*prototype_id);
586 }
587 }
588 Self::link_context(&mut context, syntax, &syntaxes);
589
590 if context.uses_backrefs {
591 found_more_backref_includes = true;
592 }
593 }
594 }
595
596 // We need to recursively mark contexts that include contexts which
597 // use backreferences as using backreferences. In theory we could use
598 // a more efficient method here like doing a toposort or constructing
599 // a representation with reversed edges and then tracing in the
600 // opposite direction, but I benchmarked this and it adds <2% to link
601 // time on the default syntax set, and linking doesn't even happen
602 // when loading from a binary dump.
603 while found_more_backref_includes {
604 found_more_backref_includes = false;
605 // find any contexts which include a context which uses backrefs
606 // and mark those as using backrefs - to support nested includes
607 for context_index in 0..all_contexts.len() {
608 let context = &all_contexts[context_index];
609 if !context.uses_backrefs && context.patterns.iter().any(|pattern| {
610 match pattern {
611 Pattern::Include(ContextReference::Direct(id))
612 if all_contexts[id.index()].uses_backrefs => true,
613 _ => false,
614 }
615 }) {
616 let mut context = &mut all_contexts[context_index];
617 context.uses_backrefs = true;
618 // look for contexts including this context
619 found_more_backref_includes = true;
620 }
621 }
622 }
623
624 #[cfg(feature = "metadata")]
625 let metadata = match existing_metadata {
626 Some(existing) => existing.merged_with_raw(raw_metadata),
627 None => raw_metadata.into(),
628 };
629
630 SyntaxSet {
631 syntaxes,
632 contexts: all_contexts,
633 path_syntaxes,
634 first_line_cache: AtomicLazyCell::new(),
635 #[cfg(feature = "metadata")]
636 metadata,
637 }
638 }
639
640 /// Anything recursively included by the prototype shouldn't include the prototype.
641 /// This marks them as such.
recursively_mark_no_prototype( syntax: &SyntaxReference, context_id: usize, contexts: &[Context], no_prototype: &mut HashSet<usize>, )642 fn recursively_mark_no_prototype(
643 syntax: &SyntaxReference,
644 context_id: usize,
645 contexts: &[Context],
646 no_prototype: &mut HashSet<usize>,
647 ) {
648 let first_time = no_prototype.insert(context_id);
649 if !first_time {
650 return;
651 }
652
653 for pattern in &contexts[context_id].patterns {
654 match *pattern {
655 // Apparently inline blocks also don't include the prototype when within the prototype.
656 // This is really weird, but necessary to run the YAML syntax.
657 Pattern::Match(ref match_pat) => {
658 let maybe_context_refs = match match_pat.operation {
659 MatchOperation::Push(ref context_refs) |
660 MatchOperation::Set(ref context_refs) => Some(context_refs),
661 MatchOperation::Pop | MatchOperation::None => None,
662 };
663 if let Some(context_refs) = maybe_context_refs {
664 for context_ref in context_refs.iter() {
665 match context_ref {
666 ContextReference::Inline(ref s) | ContextReference::Named(ref s) => {
667 if let Some(i) = syntax.contexts.get(s) {
668 Self::recursively_mark_no_prototype(syntax, i.index(), contexts, no_prototype);
669 }
670 },
671 ContextReference::Direct(ref id) => {
672 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
673 },
674 _ => (),
675 }
676 }
677 }
678 }
679 Pattern::Include(ref reference) => {
680 match reference {
681 ContextReference::Named(ref s) => {
682 if let Some(id) = syntax.contexts.get(s) {
683 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
684 }
685 },
686 ContextReference::Direct(ref id) => {
687 Self::recursively_mark_no_prototype(syntax, id.index(), contexts, no_prototype);
688 },
689 _ => (),
690 }
691 }
692 }
693 }
694 }
695
link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])696 fn link_context(context: &mut Context, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
697 for pattern in &mut context.patterns {
698 match *pattern {
699 Pattern::Match(ref mut match_pat) => Self::link_match_pat(match_pat, syntax, syntaxes),
700 Pattern::Include(ref mut context_ref) => Self::link_ref(context_ref, syntax, syntaxes),
701 }
702 }
703 }
704
link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])705 fn link_ref(context_ref: &mut ContextReference, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
706 // println!("{:?}", context_ref);
707 use super::syntax_definition::ContextReference::*;
708 let linked_context_id = match *context_ref {
709 Named(ref s) | Inline(ref s) => {
710 // This isn't actually correct, but it is better than nothing/crashing.
711 // This is being phased out anyhow, see https://github.com/sublimehq/Packages/issues/73
712 // Fixes issue #30
713 if s == "$top_level_main" {
714 syntax.contexts.get("main")
715 } else {
716 syntax.contexts.get(s)
717 }
718 }
719 ByScope { scope, ref sub_context } => {
720 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
721 syntaxes
722 .iter()
723 .rev()
724 .find(|s| s.scope == scope)
725 .and_then(|s| s.contexts.get(context_name))
726 }
727 File { ref name, ref sub_context } => {
728 let context_name = sub_context.as_ref().map_or("main", |x| &**x);
729 syntaxes
730 .iter()
731 .rev()
732 .find(|s| &s.name == name)
733 .and_then(|s| s.contexts.get(context_name))
734 }
735 Direct(_) => None,
736 };
737 if let Some(context_id) = linked_context_id {
738 let mut new_ref = Direct(*context_id);
739 mem::swap(context_ref, &mut new_ref);
740 }
741 }
742
link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference])743 fn link_match_pat(match_pat: &mut MatchPattern, syntax: &SyntaxReference, syntaxes: &[SyntaxReference]) {
744 let maybe_context_refs = match match_pat.operation {
745 MatchOperation::Push(ref mut context_refs) |
746 MatchOperation::Set(ref mut context_refs) => Some(context_refs),
747 MatchOperation::Pop | MatchOperation::None => None,
748 };
749 if let Some(context_refs) = maybe_context_refs {
750 for context_ref in context_refs.iter_mut() {
751 Self::link_ref(context_ref, syntax, syntaxes);
752 }
753 }
754 if let Some(ref mut context_ref) = match_pat.with_prototype {
755 Self::link_ref(context_ref, syntax, syntaxes);
756 }
757 }
758 }
759
760 #[derive(Debug)]
761 struct FirstLineCache {
762 /// (first line regex, syntax index) pairs for all syntaxes with a first line regex
763 regexes: Vec<(Regex, usize)>,
764 }
765
766 impl FirstLineCache {
new(syntaxes: &[SyntaxReference]) -> FirstLineCache767 fn new(syntaxes: &[SyntaxReference]) -> FirstLineCache {
768 let mut regexes = Vec::new();
769 for (i, syntax) in syntaxes.iter().enumerate() {
770 if let Some(ref reg_str) = syntax.first_line_match {
771 let reg = Regex::new(reg_str.into());
772 regexes.push((reg, i));
773 }
774 }
775 FirstLineCache {
776 regexes,
777 }
778 }
779 }
780
781
782 #[cfg(feature = "yaml-load")]
783 #[cfg(test)]
784 mod tests {
785 use super::*;
786 use crate::parsing::{ParseState, Scope, syntax_definition};
787 use std::collections::HashMap;
788
789 #[test]
can_load()790 fn can_load() {
791 let mut builder = SyntaxSetBuilder::new();
792 builder.add_from_folder("testdata/Packages", false).unwrap();
793
794 let cmake_dummy_syntax = SyntaxDefinition {
795 name: "CMake".to_string(),
796 file_extensions: vec!["CMakeLists.txt".to_string(), "cmake".to_string()],
797 scope: Scope::new("source.cmake").unwrap(),
798 first_line_match: None,
799 hidden: false,
800 variables: HashMap::new(),
801 contexts: HashMap::new(),
802 };
803
804 builder.add(cmake_dummy_syntax);
805 builder.add_plain_text_syntax();
806
807 let ps = builder.build();
808
809 assert_eq!(&ps.find_syntax_by_first_line("#!/usr/bin/env node").unwrap().name,
810 "JavaScript");
811 let rails_scope = Scope::new("source.ruby.rails").unwrap();
812 let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
813 ps.find_syntax_plain_text();
814 assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
815 assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
816 assert_eq!(&ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line").unwrap().name,
817 "C");
818 assert_eq!(&ps.find_syntax_for_file("testdata/parser.rs").unwrap().unwrap().name,
819 "Rust");
820 assert_eq!(&ps.find_syntax_for_file("testdata/test_first_line.test")
821 .expect("Error finding syntax for file")
822 .expect("No syntax found for file")
823 .name,
824 "Ruby");
825 assert_eq!(&ps.find_syntax_for_file(".bashrc").unwrap().unwrap().name,
826 "Bourne Again Shell (bash)");
827 assert_eq!(&ps.find_syntax_for_file("CMakeLists.txt").unwrap().unwrap().name,
828 "CMake");
829 assert_eq!(&ps.find_syntax_for_file("test.cmake").unwrap().unwrap().name,
830 "CMake");
831 assert_eq!(&ps.find_syntax_for_file("Rakefile").unwrap().unwrap().name, "Ruby");
832 assert!(&ps.find_syntax_by_first_line("derp derp hi lol").is_none());
833 assert_eq!(&ps.find_syntax_by_path("Packages/Rust/Rust.sublime-syntax").unwrap().name,
834 "Rust");
835 // println!("{:#?}", syntax);
836 assert_eq!(syntax.scope, rails_scope);
837 // assert!(false);
838 let main_context = ps.get_context(&syntax.contexts["main"]);
839 let count = syntax_definition::context_iter(&ps, main_context).count();
840 assert_eq!(count, 109);
841 }
842
843 #[test]
can_clone()844 fn can_clone() {
845 let cloned_syntax_set = {
846 let mut builder = SyntaxSetBuilder::new();
847 builder.add(syntax_a());
848 builder.add(syntax_b());
849
850 let syntax_set_original = builder.build();
851 syntax_set_original.clone()
852 // Note: The original syntax set is dropped
853 };
854
855 let syntax = cloned_syntax_set.find_syntax_by_extension("a").unwrap();
856 let mut parse_state = ParseState::new(syntax);
857 let ops = parse_state.parse_line("a go_b b", &cloned_syntax_set);
858 let expected = (7, ScopeStackOp::Push(Scope::new("b").unwrap()));
859 assert_ops_contain(&ops, &expected);
860 }
861
862 #[test]
can_list_added_syntaxes()863 fn can_list_added_syntaxes() {
864 let mut builder = SyntaxSetBuilder::new();
865 builder.add(syntax_a());
866 builder.add(syntax_b());
867 let syntaxes = builder.syntaxes();
868
869 assert_eq!(syntaxes.len(), 2);
870 assert_eq!(syntaxes[0].name, "A");
871 assert_eq!(syntaxes[1].name, "B");
872 }
873
874 #[test]
can_add_more_syntaxes_with_builder()875 fn can_add_more_syntaxes_with_builder() {
876 let syntax_set_original = {
877 let mut builder = SyntaxSetBuilder::new();
878 builder.add(syntax_a());
879 builder.add(syntax_b());
880 builder.build()
881 };
882
883 let mut builder = syntax_set_original.into_builder();
884
885 let syntax_c = SyntaxDefinition::load_from_str(r#"
886 name: C
887 scope: source.c
888 file_extensions: [c]
889 contexts:
890 main:
891 - match: 'c'
892 scope: c
893 - match: 'go_a'
894 push: scope:source.a#main
895 "#, true, None).unwrap();
896
897 builder.add(syntax_c);
898
899 let syntax_set = builder.build();
900
901 let syntax = syntax_set.find_syntax_by_extension("c").unwrap();
902 let mut parse_state = ParseState::new(syntax);
903 let ops = parse_state.parse_line("c go_a a go_b b", &syntax_set);
904 let expected = (14, ScopeStackOp::Push(Scope::new("b").unwrap()));
905 assert_ops_contain(&ops, &expected);
906 }
907
908 #[test]
can_find_unlinked_contexts()909 fn can_find_unlinked_contexts() {
910 let syntax_set = {
911 let mut builder = SyntaxSetBuilder::new();
912 builder.add(syntax_a());
913 builder.add(syntax_b());
914 builder.build()
915 };
916
917 let unlinked_contexts = syntax_set.find_unlinked_contexts();
918 assert_eq!(unlinked_contexts.len(), 0);
919
920 let syntax_set = {
921 let mut builder = SyntaxSetBuilder::new();
922 builder.add(syntax_a());
923 builder.build()
924 };
925
926 let unlinked_contexts : Vec<String> = syntax_set.find_unlinked_contexts().into_iter().collect();
927 assert_eq!(unlinked_contexts.len(), 1);
928 assert_eq!(unlinked_contexts[0], "Syntax 'A' with scope 'source.a' has unresolved context reference ByScope { scope: <source.b>, sub_context: Some(\"main\") }");
929 }
930
931 #[test]
can_use_in_multiple_threads()932 fn can_use_in_multiple_threads() {
933 use rayon::prelude::*;
934
935 let syntax_set = {
936 let mut builder = SyntaxSetBuilder::new();
937 builder.add(syntax_a());
938 builder.add(syntax_b());
939 builder.build()
940 };
941
942 let lines = vec![
943 "a a a",
944 "a go_b b",
945 "go_b b",
946 "go_b b b",
947 ];
948
949 let results: Vec<Vec<(usize, ScopeStackOp)>> = lines
950 .par_iter()
951 .map(|line| {
952 let syntax = syntax_set.find_syntax_by_extension("a").unwrap();
953 let mut parse_state = ParseState::new(syntax);
954 parse_state.parse_line(line, &syntax_set)
955 })
956 .collect();
957
958 assert_ops_contain(&results[0], &(4, ScopeStackOp::Push(Scope::new("a").unwrap())));
959 assert_ops_contain(&results[1], &(7, ScopeStackOp::Push(Scope::new("b").unwrap())));
960 assert_ops_contain(&results[2], &(5, ScopeStackOp::Push(Scope::new("b").unwrap())));
961 assert_ops_contain(&results[3], &(8, ScopeStackOp::Push(Scope::new("b").unwrap())));
962 }
963
964 #[test]
is_sync()965 fn is_sync() {
966 check_sync::<SyntaxSet>();
967 }
968
969 #[test]
is_send()970 fn is_send() {
971 check_send::<SyntaxSet>();
972 }
973
974 #[test]
can_override_syntaxes()975 fn can_override_syntaxes() {
976 let syntax_set = {
977 let mut builder = SyntaxSetBuilder::new();
978 builder.add(syntax_a());
979 builder.add(syntax_b());
980
981 let syntax_a2 = SyntaxDefinition::load_from_str(r#"
982 name: A improved
983 scope: source.a
984 file_extensions: [a]
985 first_line_match: syntax\s+a
986 contexts:
987 main:
988 - match: a
989 scope: a2
990 - match: go_b
991 push: scope:source.b#main
992 "#, true, None).unwrap();
993
994 builder.add(syntax_a2);
995
996 let syntax_c = SyntaxDefinition::load_from_str(r#"
997 name: C
998 scope: source.c
999 file_extensions: [c]
1000 first_line_match: syntax\s+.*
1001 contexts:
1002 main:
1003 - match: c
1004 scope: c
1005 - match: go_a
1006 push: scope:source.a#main
1007 "#, true, None).unwrap();
1008
1009 builder.add(syntax_c);
1010
1011 builder.build()
1012 };
1013
1014 let mut syntax = syntax_set.find_syntax_by_extension("a").unwrap();
1015 assert_eq!(syntax.name, "A improved");
1016 syntax = syntax_set.find_syntax_by_scope(Scope::new(&"source.a").unwrap()).unwrap();
1017 assert_eq!(syntax.name, "A improved");
1018 syntax = syntax_set.find_syntax_by_first_line(&"syntax a").unwrap();
1019 assert_eq!(syntax.name, "C");
1020
1021 let mut parse_state = ParseState::new(syntax);
1022 let ops = parse_state.parse_line("c go_a a", &syntax_set);
1023 let expected = (7, ScopeStackOp::Push(Scope::new("a2").unwrap()));
1024 assert_ops_contain(&ops, &expected);
1025 }
1026
1027 #[test]
can_parse_issue219()1028 fn can_parse_issue219() {
1029 // Go to builder and back after loading so that build() gets Direct references instead of
1030 // Named ones. The bug was that Direct references were not handled when marking as
1031 // "no prototype", so prototype contexts accidentally had the prototype set, which made
1032 // the parser loop forever.
1033 let syntax_set = SyntaxSet::load_defaults_newlines().into_builder().build();
1034 let syntax = syntax_set.find_syntax_by_extension("yaml").unwrap();
1035
1036 let mut parse_state = ParseState::new(syntax);
1037 let ops = parse_state.parse_line("# test\n", &syntax_set);
1038 let expected = (0, ScopeStackOp::Push(Scope::new("comment.line.number-sign.yaml").unwrap()));
1039 assert_ops_contain(&ops, &expected);
1040 }
1041
1042 #[test]
no_prototype_for_contexts_included_from_prototype()1043 fn no_prototype_for_contexts_included_from_prototype() {
1044 let mut builder = SyntaxSetBuilder::new();
1045 let syntax = SyntaxDefinition::load_from_str(r#"
1046 name: Test Prototype
1047 scope: source.test
1048 file_extensions: [test]
1049 contexts:
1050 prototype:
1051 - include: included_from_prototype
1052 main:
1053 - match: main
1054 - match: other
1055 push: other
1056 other:
1057 - match: o
1058 included_from_prototype:
1059 - match: p
1060 scope: p
1061 "#, true, None).unwrap();
1062 builder.add(syntax);
1063 let ss = builder.build();
1064
1065 // "main" and "other" should have context set, "prototype" and "included_from_prototype"
1066 // must not have a prototype set.
1067 assert_prototype_only_on(&["main", "other"], &ss, &ss.syntaxes()[0]);
1068
1069 // Building again should have the same result. The difference is that after the first
1070 // build(), the references have been replaced with Direct references, so the code needs to
1071 // handle that correctly.
1072 let rebuilt = ss.into_builder().build();
1073 assert_prototype_only_on(&["main", "other"], &rebuilt, &rebuilt.syntaxes()[0]);
1074 }
1075
1076 #[test]
no_prototype_for_contexts_inline_in_prototype()1077 fn no_prototype_for_contexts_inline_in_prototype() {
1078 let mut builder = SyntaxSetBuilder::new();
1079 let syntax = SyntaxDefinition::load_from_str(r#"
1080 name: Test Prototype
1081 scope: source.test
1082 file_extensions: [test]
1083 contexts:
1084 prototype:
1085 - match: p
1086 push:
1087 - match: p2
1088 main:
1089 - match: main
1090 "#, true, None).unwrap();
1091 builder.add(syntax);
1092 let ss = builder.build();
1093
1094 assert_prototype_only_on(&["main"], &ss, &ss.syntaxes()[0]);
1095
1096 let rebuilt = ss.into_builder().build();
1097 assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
1098 }
1099
assert_ops_contain( ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp) )1100 fn assert_ops_contain(
1101 ops: &[(usize, ScopeStackOp)],
1102 expected: &(usize, ScopeStackOp)
1103 ) {
1104 assert!(ops.contains(expected),
1105 "expected operations to contain {:?}: {:?}", expected, ops);
1106 }
1107
assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference)1108 fn assert_prototype_only_on(expected: &[&str], syntax_set: &SyntaxSet, syntax: &SyntaxReference) {
1109 for (name, id) in &syntax.contexts {
1110 if name == "__main" || name == "__start" {
1111 // Skip special contexts
1112 continue;
1113 }
1114 let context = syntax_set.get_context(id);
1115 if expected.contains(&name.as_str()) {
1116 assert!(context.prototype.is_some(), "Expected context {} to have prototype", name);
1117 } else {
1118 assert!(context.prototype.is_none(), "Expected context {} to not have prototype", name);
1119 }
1120 }
1121 }
1122
check_send<T: Send>()1123 fn check_send<T: Send>() {}
1124
check_sync<T: Sync>()1125 fn check_sync<T: Sync>() {}
1126
syntax_a() -> SyntaxDefinition1127 fn syntax_a() -> SyntaxDefinition {
1128 SyntaxDefinition::load_from_str(
1129 r#"
1130 name: A
1131 scope: source.a
1132 file_extensions: [a]
1133 contexts:
1134 main:
1135 - match: 'a'
1136 scope: a
1137 - match: 'go_b'
1138 push: scope:source.b#main
1139 "#,
1140 true,
1141 None,
1142 ).unwrap()
1143 }
1144
syntax_b() -> SyntaxDefinition1145 fn syntax_b() -> SyntaxDefinition {
1146 SyntaxDefinition::load_from_str(
1147 r#"
1148 name: B
1149 scope: source.b
1150 file_extensions: [b]
1151 contexts:
1152 main:
1153 - match: 'b'
1154 scope: b
1155 "#,
1156 true,
1157 None,
1158 ).unwrap()
1159 }
1160 }
1161