1 use crate::{
2 chars::{char_is_line_ending, char_is_whitespace},
3 find_first_non_whitespace_char,
4 syntax::{IndentQuery, LanguageConfiguration, Syntax},
5 tree_sitter::Node,
6 Rope, RopeSlice,
7 };
8
9 /// Enum representing indentation style.
10 ///
11 /// Only values 1-8 are valid for the `Spaces` variant.
12 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
13 pub enum IndentStyle {
14 Tabs,
15 Spaces(u8),
16 }
17
18 impl IndentStyle {
19 /// Creates an `IndentStyle` from an indentation string.
20 ///
21 /// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`.
22 #[allow(clippy::should_implement_trait)]
23 #[inline]
from_str(indent: &str) -> Self24 pub fn from_str(indent: &str) -> Self {
25 // XXX: do we care about validating the input more than this? Probably not...?
26 debug_assert!(!indent.is_empty() && indent.len() <= 8);
27
28 if indent.starts_with(' ') {
29 IndentStyle::Spaces(indent.len() as u8)
30 } else {
31 IndentStyle::Tabs
32 }
33 }
34
35 #[inline]
as_str(&self) -> &'static str36 pub fn as_str(&self) -> &'static str {
37 match *self {
38 IndentStyle::Tabs => "\t",
39 IndentStyle::Spaces(1) => " ",
40 IndentStyle::Spaces(2) => " ",
41 IndentStyle::Spaces(3) => " ",
42 IndentStyle::Spaces(4) => " ",
43 IndentStyle::Spaces(5) => " ",
44 IndentStyle::Spaces(6) => " ",
45 IndentStyle::Spaces(7) => " ",
46 IndentStyle::Spaces(8) => " ",
47
48 // Unsupported indentation style. This should never happen,
49 // but just in case fall back to two spaces.
50 IndentStyle::Spaces(n) => {
51 debug_assert!(n > 0 && n <= 8); // Always triggers. `debug_panic!()` wanted.
52 " "
53 }
54 }
55 }
56 }
57
58 /// Attempts to detect the indentation style used in a document.
59 ///
60 /// Returns the indentation style if the auto-detect confidence is
61 /// reasonably high, otherwise returns `None`.
auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle>62 pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
63 // Build a histogram of the indentation *increases* between
64 // subsequent lines, ignoring lines that are all whitespace.
65 //
66 // Index 0 is for tabs, the rest are 1-8 spaces.
67 let histogram: [usize; 9] = {
68 let mut histogram = [0; 9];
69 let mut prev_line_is_tabs = false;
70 let mut prev_line_leading_count = 0usize;
71
72 // Loop through the lines, checking for and recording indentation
73 // increases as we go.
74 'outer: for line in document_text.lines().take(1000) {
75 let mut c_iter = line.chars();
76
77 // Is first character a tab or space?
78 let is_tabs = match c_iter.next() {
79 Some('\t') => true,
80 Some(' ') => false,
81
82 // Ignore blank lines.
83 Some(c) if char_is_line_ending(c) => continue,
84
85 _ => {
86 prev_line_is_tabs = false;
87 prev_line_leading_count = 0;
88 continue;
89 }
90 };
91
92 // Count the line's total leading tab/space characters.
93 let mut leading_count = 1;
94 let mut count_is_done = false;
95 for c in c_iter {
96 match c {
97 '\t' if is_tabs && !count_is_done => leading_count += 1,
98 ' ' if !is_tabs && !count_is_done => leading_count += 1,
99
100 // We stop counting if we hit whitespace that doesn't
101 // qualify as indent or doesn't match the leading
102 // whitespace, but we don't exit the loop yet because
103 // we still want to determine if the line is blank.
104 c if char_is_whitespace(c) => count_is_done = true,
105
106 // Ignore blank lines.
107 c if char_is_line_ending(c) => continue 'outer,
108
109 _ => break,
110 }
111
112 // Bound the worst-case execution time for weird text files.
113 if leading_count > 256 {
114 continue 'outer;
115 }
116 }
117
118 // If there was an increase in indentation over the previous
119 // line, update the histogram with that increase.
120 if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
121 && prev_line_leading_count < leading_count
122 {
123 if is_tabs {
124 histogram[0] += 1;
125 } else {
126 let amount = leading_count - prev_line_leading_count;
127 if amount <= 8 {
128 histogram[amount] += 1;
129 }
130 }
131 }
132
133 // Store this line's leading whitespace info for use with
134 // the next line.
135 prev_line_is_tabs = is_tabs;
136 prev_line_leading_count = leading_count;
137 }
138
139 // Give more weight to tabs, because their presence is a very
140 // strong indicator.
141 histogram[0] *= 2;
142
143 histogram
144 };
145
146 // Find the most frequent indent, its frequency, and the frequency of
147 // the next-most frequent indent.
148 let indent = histogram
149 .iter()
150 .enumerate()
151 .max_by_key(|kv| kv.1)
152 .unwrap()
153 .0;
154 let indent_freq = histogram[indent];
155 let indent_freq_2 = *histogram
156 .iter()
157 .enumerate()
158 .filter(|kv| kv.0 != indent)
159 .map(|kv| kv.1)
160 .max()
161 .unwrap();
162
163 // Return the the auto-detected result if we're confident enough in its
164 // accuracy, based on some heuristics.
165 if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
166 Some(match indent {
167 0 => IndentStyle::Tabs,
168 _ => IndentStyle::Spaces(indent as u8),
169 })
170 } else {
171 None
172 }
173 }
174
175 /// To determine indentation of a newly inserted line, figure out the indentation at the last col
176 /// of the previous line.
177 #[allow(dead_code)]
indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize178 fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize {
179 let mut len = 0;
180 for ch in line.chars() {
181 match ch {
182 '\t' => len += tab_width,
183 ' ' => len += 1,
184 _ => break,
185 }
186 }
187
188 len / tab_width
189 }
190
191 /// Find the highest syntax node at position.
192 /// This is to identify the column where this node (e.g., an HTML closing tag) ends.
get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node>193 fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node> {
194 let tree = syntax.tree();
195
196 // named_descendant
197 let mut node = match tree.root_node().descendant_for_byte_range(pos, pos) {
198 Some(node) => node,
199 None => return None,
200 };
201
202 while let Some(parent) = node.parent() {
203 if parent.start_byte() == node.start_byte() {
204 node = parent
205 } else {
206 break;
207 }
208 }
209
210 Some(node)
211 }
212
calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize213 fn calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize {
214 // NOTE: can't use contains() on query because of comparing Vec<String> and &str
215 // https://doc.rust-lang.org/std/vec/struct.Vec.html#method.contains
216
217 let mut increment: isize = 0;
218
219 let mut node = match node {
220 Some(node) => node,
221 None => return 0,
222 };
223
224 let mut prev_start = node.start_position().row;
225
226 // if we're calculating indentation for a brand new line then the current node will become the
227 // parent node. We need to take it's indentation level into account too.
228 let node_kind = node.kind();
229 if newline && query.indent.contains(node_kind) {
230 increment += 1;
231 }
232
233 while let Some(parent) = node.parent() {
234 let parent_kind = parent.kind();
235 let start = parent.start_position().row;
236
237 // detect deeply nested indents in the same line
238 // .map(|a| { <-- ({ is two scopes
239 // let len = 1; <-- indents one level
240 // }) <-- }) is two scopes
241 let starts_same_line = start == prev_start;
242
243 if query.outdent.contains(node.kind()) && !starts_same_line {
244 // we outdent by skipping the rules for the current level and jumping up
245 // node = parent;
246 increment -= 1;
247 // continue;
248 }
249
250 if query.indent.contains(parent_kind) // && not_first_or_last_sibling
251 && !starts_same_line
252 {
253 // println!("is_scope {}", parent_kind);
254 prev_start = start;
255 increment += 1
256 }
257
258 // if last_scope && increment > 0 && ...{ ignore }
259
260 node = parent;
261 }
262
263 increment.max(0) as usize
264 }
265
266 #[allow(dead_code)]
suggested_indent_for_line( language_config: &LanguageConfiguration, syntax: Option<&Syntax>, text: RopeSlice, line_num: usize, _tab_width: usize, ) -> usize267 fn suggested_indent_for_line(
268 language_config: &LanguageConfiguration,
269 syntax: Option<&Syntax>,
270 text: RopeSlice,
271 line_num: usize,
272 _tab_width: usize,
273 ) -> usize {
274 if let Some(start) = find_first_non_whitespace_char(text.line(line_num)) {
275 return suggested_indent_for_pos(
276 Some(language_config),
277 syntax,
278 text,
279 start + text.line_to_char(line_num),
280 false,
281 );
282 };
283
284 // if the line is blank, indent should be zero
285 0
286 }
287
288 // TODO: two usecases: if we are triggering this for a new, blank line:
289 // - it should return 0 when mass indenting stuff
290 // - it should look up the wrapper node and count it too when we press o/O
suggested_indent_for_pos( language_config: Option<&LanguageConfiguration>, syntax: Option<&Syntax>, text: RopeSlice, pos: usize, new_line: bool, ) -> usize291 pub fn suggested_indent_for_pos(
292 language_config: Option<&LanguageConfiguration>,
293 syntax: Option<&Syntax>,
294 text: RopeSlice,
295 pos: usize,
296 new_line: bool,
297 ) -> usize {
298 if let (Some(query), Some(syntax)) = (
299 language_config.and_then(|config| config.indent_query()),
300 syntax,
301 ) {
302 let byte_start = text.char_to_byte(pos);
303 let node = get_highest_syntax_node_at_bytepos(syntax, byte_start);
304
305 // let config = load indentation query config from Syntax(should contain language_config)
306
307 // TODO: special case for comments
308 // TODO: if preserve_leading_whitespace
309 calculate_indentation(query, node, new_line)
310 } else {
311 // TODO: heuristics for non-tree sitter grammars
312 0
313 }
314 }
315
get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str>316 pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
317 let mut scopes = Vec::new();
318 if let Some(syntax) = syntax {
319 let pos = text.char_to_byte(pos);
320 let mut node = match syntax
321 .tree()
322 .root_node()
323 .descendant_for_byte_range(pos, pos)
324 {
325 Some(node) => node,
326 None => return scopes,
327 };
328
329 scopes.push(node.kind());
330
331 while let Some(parent) = node.parent() {
332 scopes.push(parent.kind());
333 node = parent;
334 }
335 }
336
337 scopes.reverse();
338 scopes
339 }
340
341 #[cfg(test)]
342 mod test {
343 use super::*;
344 use crate::Rope;
345
346 #[test]
test_indent_level()347 fn test_indent_level() {
348 let tab_width = 4;
349 let line = Rope::from(" fn new"); // 8 spaces
350 assert_eq!(indent_level_for_line(line.slice(..), tab_width), 2);
351 let line = Rope::from("\t\t\tfn new"); // 3 tabs
352 assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
353 // mixed indentation
354 let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab
355 assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
356 }
357
358 #[test]
test_suggested_indent_for_line()359 fn test_suggested_indent_for_line() {
360 let doc = Rope::from(
361 "
362 use std::{
363 io::{self, stdout, Stdout, Write},
364 path::PathBuf,
365 sync::Arc,
366 time::Duration,
367 }
368 mod test {
369 fn hello_world() {
370 1 + 1;
371
372 let does_indentation_work = 1;
373
374 let test_function = function_with_param(this_param,
375 that_param
376 );
377
378 let test_function = function_with_param(
379 this_param,
380 that_param
381 );
382
383 let test_function = function_with_proper_indent(param1,
384 param2,
385 );
386
387 let selection = Selection::new(
388 changes
389 .clone()
390 .map(|(start, end, text): (usize, usize, Option<Tendril>)| {
391 let len = text.map(|text| text.len()).unwrap() - 1; // minus newline
392 let pos = start + len;
393 Range::new(pos, pos)
394 })
395 .collect(),
396 0,
397 );
398
399 return;
400 }
401 }
402
403 impl<A, D> MyTrait<A, D> for YourType
404 where
405 A: TraitB + TraitC,
406 D: TraitE + TraitF,
407 {
408
409 }
410 #[test]
411 //
412 match test {
413 Some(a) => 1,
414 None => {
415 unimplemented!()
416 }
417 }
418 std::panic::set_hook(Box::new(move |info| {
419 hook(info);
420 }));
421
422 { { {
423 1
424 }}}
425
426 pub fn change<I>(document: &Document, changes: I) -> Self
427 where
428 I: IntoIterator<Item = Change> + ExactSizeIterator,
429 {
430 [
431 1,
432 2,
433 3,
434 ];
435 (
436 1,
437 2
438 );
439 true
440 }
441 ",
442 );
443
444 let doc = Rope::from(doc);
445 use crate::syntax::{
446 Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
447 };
448 use once_cell::sync::OnceCell;
449 let loader = Loader::new(Configuration {
450 language: vec![LanguageConfiguration {
451 scope: "source.rust".to_string(),
452 file_types: vec!["rs".to_string()],
453 language_id: "Rust".to_string(),
454 highlight_config: OnceCell::new(),
455 config: None,
456 //
457 injection_regex: None,
458 roots: vec![],
459 comment_token: None,
460 auto_format: false,
461 language_server: None,
462 indent: Some(IndentationConfiguration {
463 tab_width: 4,
464 unit: String::from(" "),
465 }),
466 indent_query: OnceCell::new(),
467 textobject_query: OnceCell::new(),
468 }],
469 });
470
471 // set runtime path so we can find the queries
472 let mut runtime = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
473 runtime.push("../runtime");
474 std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap());
475
476 let language_config = loader.language_config_for_scope("source.rust").unwrap();
477 let highlight_config = language_config.highlight_config(&[]).unwrap();
478 let syntax = Syntax::new(&doc, highlight_config.clone());
479 let text = doc.slice(..);
480 let tab_width = 4;
481
482 for i in 0..doc.len_lines() {
483 let line = text.line(i);
484 let indent = indent_level_for_line(line, tab_width);
485 assert_eq!(
486 suggested_indent_for_line(&language_config, Some(&syntax), text, i, tab_width),
487 indent,
488 "line {}: {}",
489 i,
490 line
491 );
492 }
493 }
494 }
495