1 use crate::{
2     chars::{char_is_line_ending, char_is_whitespace},
3     find_first_non_whitespace_char,
4     syntax::{IndentQuery, LanguageConfiguration, Syntax},
5     tree_sitter::Node,
6     Rope, RopeSlice,
7 };
8 
9 /// Enum representing indentation style.
10 ///
11 /// Only values 1-8 are valid for the `Spaces` variant.
12 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
13 pub enum IndentStyle {
14     Tabs,
15     Spaces(u8),
16 }
17 
18 impl IndentStyle {
19     /// Creates an `IndentStyle` from an indentation string.
20     ///
21     /// For example, passing `"    "` (four spaces) will create `IndentStyle::Spaces(4)`.
22     #[allow(clippy::should_implement_trait)]
23     #[inline]
from_str(indent: &str) -> Self24     pub fn from_str(indent: &str) -> Self {
25         // XXX: do we care about validating the input more than this?  Probably not...?
26         debug_assert!(!indent.is_empty() && indent.len() <= 8);
27 
28         if indent.starts_with(' ') {
29             IndentStyle::Spaces(indent.len() as u8)
30         } else {
31             IndentStyle::Tabs
32         }
33     }
34 
35     #[inline]
as_str(&self) -> &'static str36     pub fn as_str(&self) -> &'static str {
37         match *self {
38             IndentStyle::Tabs => "\t",
39             IndentStyle::Spaces(1) => " ",
40             IndentStyle::Spaces(2) => "  ",
41             IndentStyle::Spaces(3) => "   ",
42             IndentStyle::Spaces(4) => "    ",
43             IndentStyle::Spaces(5) => "     ",
44             IndentStyle::Spaces(6) => "      ",
45             IndentStyle::Spaces(7) => "       ",
46             IndentStyle::Spaces(8) => "        ",
47 
48             // Unsupported indentation style.  This should never happen,
49             // but just in case fall back to two spaces.
50             IndentStyle::Spaces(n) => {
51                 debug_assert!(n > 0 && n <= 8); // Always triggers. `debug_panic!()` wanted.
52                 "  "
53             }
54         }
55     }
56 }
57 
58 /// Attempts to detect the indentation style used in a document.
59 ///
60 /// Returns the indentation style if the auto-detect confidence is
61 /// reasonably high, otherwise returns `None`.
auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle>62 pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
63     // Build a histogram of the indentation *increases* between
64     // subsequent lines, ignoring lines that are all whitespace.
65     //
66     // Index 0 is for tabs, the rest are 1-8 spaces.
67     let histogram: [usize; 9] = {
68         let mut histogram = [0; 9];
69         let mut prev_line_is_tabs = false;
70         let mut prev_line_leading_count = 0usize;
71 
72         // Loop through the lines, checking for and recording indentation
73         // increases as we go.
74         'outer: for line in document_text.lines().take(1000) {
75             let mut c_iter = line.chars();
76 
77             // Is first character a tab or space?
78             let is_tabs = match c_iter.next() {
79                 Some('\t') => true,
80                 Some(' ') => false,
81 
82                 // Ignore blank lines.
83                 Some(c) if char_is_line_ending(c) => continue,
84 
85                 _ => {
86                     prev_line_is_tabs = false;
87                     prev_line_leading_count = 0;
88                     continue;
89                 }
90             };
91 
92             // Count the line's total leading tab/space characters.
93             let mut leading_count = 1;
94             let mut count_is_done = false;
95             for c in c_iter {
96                 match c {
97                     '\t' if is_tabs && !count_is_done => leading_count += 1,
98                     ' ' if !is_tabs && !count_is_done => leading_count += 1,
99 
100                     // We stop counting if we hit whitespace that doesn't
101                     // qualify as indent or doesn't match the leading
102                     // whitespace, but we don't exit the loop yet because
103                     // we still want to determine if the line is blank.
104                     c if char_is_whitespace(c) => count_is_done = true,
105 
106                     // Ignore blank lines.
107                     c if char_is_line_ending(c) => continue 'outer,
108 
109                     _ => break,
110                 }
111 
112                 // Bound the worst-case execution time for weird text files.
113                 if leading_count > 256 {
114                     continue 'outer;
115                 }
116             }
117 
118             // If there was an increase in indentation over the previous
119             // line, update the histogram with that increase.
120             if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
121                 && prev_line_leading_count < leading_count
122             {
123                 if is_tabs {
124                     histogram[0] += 1;
125                 } else {
126                     let amount = leading_count - prev_line_leading_count;
127                     if amount <= 8 {
128                         histogram[amount] += 1;
129                     }
130                 }
131             }
132 
133             // Store this line's leading whitespace info for use with
134             // the next line.
135             prev_line_is_tabs = is_tabs;
136             prev_line_leading_count = leading_count;
137         }
138 
139         // Give more weight to tabs, because their presence is a very
140         // strong indicator.
141         histogram[0] *= 2;
142 
143         histogram
144     };
145 
146     // Find the most frequent indent, its frequency, and the frequency of
147     // the next-most frequent indent.
148     let indent = histogram
149         .iter()
150         .enumerate()
151         .max_by_key(|kv| kv.1)
152         .unwrap()
153         .0;
154     let indent_freq = histogram[indent];
155     let indent_freq_2 = *histogram
156         .iter()
157         .enumerate()
158         .filter(|kv| kv.0 != indent)
159         .map(|kv| kv.1)
160         .max()
161         .unwrap();
162 
163     // Return the the auto-detected result if we're confident enough in its
164     // accuracy, based on some heuristics.
165     if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
166         Some(match indent {
167             0 => IndentStyle::Tabs,
168             _ => IndentStyle::Spaces(indent as u8),
169         })
170     } else {
171         None
172     }
173 }
174 
175 /// To determine indentation of a newly inserted line, figure out the indentation at the last col
176 /// of the previous line.
177 #[allow(dead_code)]
indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize178 fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize {
179     let mut len = 0;
180     for ch in line.chars() {
181         match ch {
182             '\t' => len += tab_width,
183             ' ' => len += 1,
184             _ => break,
185         }
186     }
187 
188     len / tab_width
189 }
190 
191 /// Find the highest syntax node at position.
192 /// This is to identify the column where this node (e.g., an HTML closing tag) ends.
get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node>193 fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node> {
194     let tree = syntax.tree();
195 
196     // named_descendant
197     let mut node = match tree.root_node().descendant_for_byte_range(pos, pos) {
198         Some(node) => node,
199         None => return None,
200     };
201 
202     while let Some(parent) = node.parent() {
203         if parent.start_byte() == node.start_byte() {
204             node = parent
205         } else {
206             break;
207         }
208     }
209 
210     Some(node)
211 }
212 
calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize213 fn calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize {
214     // NOTE: can't use contains() on query because of comparing Vec<String> and &str
215     // https://doc.rust-lang.org/std/vec/struct.Vec.html#method.contains
216 
217     let mut increment: isize = 0;
218 
219     let mut node = match node {
220         Some(node) => node,
221         None => return 0,
222     };
223 
224     let mut prev_start = node.start_position().row;
225 
226     // if we're calculating indentation for a brand new line then the current node will become the
227     // parent node. We need to take it's indentation level into account too.
228     let node_kind = node.kind();
229     if newline && query.indent.contains(node_kind) {
230         increment += 1;
231     }
232 
233     while let Some(parent) = node.parent() {
234         let parent_kind = parent.kind();
235         let start = parent.start_position().row;
236 
237         // detect deeply nested indents in the same line
238         // .map(|a| {       <-- ({ is two scopes
239         //     let len = 1; <-- indents one level
240         // })               <-- }) is two scopes
241         let starts_same_line = start == prev_start;
242 
243         if query.outdent.contains(node.kind()) && !starts_same_line {
244             // we outdent by skipping the rules for the current level and jumping up
245             // node = parent;
246             increment -= 1;
247             // continue;
248         }
249 
250         if query.indent.contains(parent_kind) // && not_first_or_last_sibling
251             && !starts_same_line
252         {
253             // println!("is_scope {}", parent_kind);
254             prev_start = start;
255             increment += 1
256         }
257 
258         // if last_scope && increment > 0 && ...{ ignore }
259 
260         node = parent;
261     }
262 
263     increment.max(0) as usize
264 }
265 
266 #[allow(dead_code)]
suggested_indent_for_line( language_config: &LanguageConfiguration, syntax: Option<&Syntax>, text: RopeSlice, line_num: usize, _tab_width: usize, ) -> usize267 fn suggested_indent_for_line(
268     language_config: &LanguageConfiguration,
269     syntax: Option<&Syntax>,
270     text: RopeSlice,
271     line_num: usize,
272     _tab_width: usize,
273 ) -> usize {
274     if let Some(start) = find_first_non_whitespace_char(text.line(line_num)) {
275         return suggested_indent_for_pos(
276             Some(language_config),
277             syntax,
278             text,
279             start + text.line_to_char(line_num),
280             false,
281         );
282     };
283 
284     // if the line is blank, indent should be zero
285     0
286 }
287 
288 // TODO: two usecases: if we are triggering this for a new, blank line:
289 // - it should return 0 when mass indenting stuff
290 // - it should look up the wrapper node and count it too when we press o/O
suggested_indent_for_pos( language_config: Option<&LanguageConfiguration>, syntax: Option<&Syntax>, text: RopeSlice, pos: usize, new_line: bool, ) -> usize291 pub fn suggested_indent_for_pos(
292     language_config: Option<&LanguageConfiguration>,
293     syntax: Option<&Syntax>,
294     text: RopeSlice,
295     pos: usize,
296     new_line: bool,
297 ) -> usize {
298     if let (Some(query), Some(syntax)) = (
299         language_config.and_then(|config| config.indent_query()),
300         syntax,
301     ) {
302         let byte_start = text.char_to_byte(pos);
303         let node = get_highest_syntax_node_at_bytepos(syntax, byte_start);
304 
305         // let config = load indentation query config from Syntax(should contain language_config)
306 
307         // TODO: special case for comments
308         // TODO: if preserve_leading_whitespace
309         calculate_indentation(query, node, new_line)
310     } else {
311         // TODO: heuristics for non-tree sitter grammars
312         0
313     }
314 }
315 
get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str>316 pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
317     let mut scopes = Vec::new();
318     if let Some(syntax) = syntax {
319         let pos = text.char_to_byte(pos);
320         let mut node = match syntax
321             .tree()
322             .root_node()
323             .descendant_for_byte_range(pos, pos)
324         {
325             Some(node) => node,
326             None => return scopes,
327         };
328 
329         scopes.push(node.kind());
330 
331         while let Some(parent) = node.parent() {
332             scopes.push(parent.kind());
333             node = parent;
334         }
335     }
336 
337     scopes.reverse();
338     scopes
339 }
340 
341 #[cfg(test)]
342 mod test {
343     use super::*;
344     use crate::Rope;
345 
346     #[test]
test_indent_level()347     fn test_indent_level() {
348         let tab_width = 4;
349         let line = Rope::from("        fn new"); // 8 spaces
350         assert_eq!(indent_level_for_line(line.slice(..), tab_width), 2);
351         let line = Rope::from("\t\t\tfn new"); // 3 tabs
352         assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
353         // mixed indentation
354         let line = Rope::from("\t    \tfn new"); // 1 tab, 4 spaces, tab
355         assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
356     }
357 
358     #[test]
test_suggested_indent_for_line()359     fn test_suggested_indent_for_line() {
360         let doc = Rope::from(
361             "
362 use std::{
363     io::{self, stdout, Stdout, Write},
364     path::PathBuf,
365     sync::Arc,
366     time::Duration,
367 }
368 mod test {
369     fn hello_world() {
370         1 + 1;
371 
372         let does_indentation_work = 1;
373 
374         let test_function = function_with_param(this_param,
375             that_param
376         );
377 
378         let test_function = function_with_param(
379             this_param,
380             that_param
381         );
382 
383         let test_function = function_with_proper_indent(param1,
384             param2,
385         );
386 
387         let selection = Selection::new(
388             changes
389                 .clone()
390                 .map(|(start, end, text): (usize, usize, Option<Tendril>)| {
391                     let len = text.map(|text| text.len()).unwrap() - 1; // minus newline
392                     let pos = start + len;
393                     Range::new(pos, pos)
394                 })
395                 .collect(),
396             0,
397         );
398 
399         return;
400     }
401 }
402 
403 impl<A, D> MyTrait<A, D> for YourType
404 where
405     A: TraitB + TraitC,
406     D: TraitE + TraitF,
407 {
408 
409 }
410 #[test]
411 //
412 match test {
413     Some(a) => 1,
414     None => {
415         unimplemented!()
416     }
417 }
418 std::panic::set_hook(Box::new(move |info| {
419     hook(info);
420 }));
421 
422 { { {
423     1
424 }}}
425 
426 pub fn change<I>(document: &Document, changes: I) -> Self
427 where
428     I: IntoIterator<Item = Change> + ExactSizeIterator,
429 {
430     [
431         1,
432         2,
433         3,
434     ];
435     (
436         1,
437         2
438     );
439     true
440 }
441 ",
442         );
443 
444         let doc = Rope::from(doc);
445         use crate::syntax::{
446             Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
447         };
448         use once_cell::sync::OnceCell;
449         let loader = Loader::new(Configuration {
450             language: vec![LanguageConfiguration {
451                 scope: "source.rust".to_string(),
452                 file_types: vec!["rs".to_string()],
453                 language_id: "Rust".to_string(),
454                 highlight_config: OnceCell::new(),
455                 config: None,
456                 //
457                 injection_regex: None,
458                 roots: vec![],
459                 comment_token: None,
460                 auto_format: false,
461                 language_server: None,
462                 indent: Some(IndentationConfiguration {
463                     tab_width: 4,
464                     unit: String::from("    "),
465                 }),
466                 indent_query: OnceCell::new(),
467                 textobject_query: OnceCell::new(),
468             }],
469         });
470 
471         // set runtime path so we can find the queries
472         let mut runtime = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
473         runtime.push("../runtime");
474         std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap());
475 
476         let language_config = loader.language_config_for_scope("source.rust").unwrap();
477         let highlight_config = language_config.highlight_config(&[]).unwrap();
478         let syntax = Syntax::new(&doc, highlight_config.clone());
479         let text = doc.slice(..);
480         let tab_width = 4;
481 
482         for i in 0..doc.len_lines() {
483             let line = text.line(i);
484             let indent = indent_level_for_line(line, tab_width);
485             assert_eq!(
486                 suggested_indent_for_line(&language_config, Some(&syntax), text, i, tab_width),
487                 indent,
488                 "line {}: {}",
489                 i,
490                 line
491             );
492         }
493     }
494 }
495