1 //! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)` 2 //! representation. 3 use std::iter; 4 5 use rustc_hash::FxHashMap; 6 use syntax::{TextRange, TextSize}; 7 8 #[derive(Clone, Debug, PartialEq, Eq)] 9 pub struct LineIndex { 10 /// Offset the the beginning of each line, zero-based 11 pub(crate) newlines: Vec<TextSize>, 12 /// List of non-ASCII characters on each line 13 pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, 14 } 15 16 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 17 pub struct LineColUtf16 { 18 /// Zero-based 19 pub line: u32, 20 /// Zero-based 21 pub col: u32, 22 } 23 24 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 25 pub struct LineCol { 26 /// Zero-based 27 pub line: u32, 28 /// Zero-based utf8 offset 29 pub col: u32, 30 } 31 32 #[derive(Clone, Debug, Hash, PartialEq, Eq)] 33 pub(crate) struct Utf16Char { 34 /// Start offset of a character inside a line, zero-based 35 pub(crate) start: TextSize, 36 /// End offset of a character inside a line, zero-based 37 pub(crate) end: TextSize, 38 } 39 40 impl Utf16Char { 41 /// Returns the length in 8-bit UTF-8 code units. len(&self) -> TextSize42 fn len(&self) -> TextSize { 43 self.end - self.start 44 } 45 46 /// Returns the length in 16-bit UTF-16 code units. len_utf16(&self) -> usize47 fn len_utf16(&self) -> usize { 48 if self.len() == TextSize::from(4) { 49 2 50 } else { 51 1 52 } 53 } 54 } 55 56 impl LineIndex { new(text: &str) -> LineIndex57 pub fn new(text: &str) -> LineIndex { 58 let mut utf16_lines = FxHashMap::default(); 59 let mut utf16_chars = Vec::new(); 60 61 let mut newlines = vec![0.into()]; 62 let mut curr_row = 0.into(); 63 let mut curr_col = 0.into(); 64 let mut line = 0; 65 for c in text.chars() { 66 let c_len = TextSize::of(c); 67 curr_row += c_len; 68 if c == '\n' { 69 newlines.push(curr_row); 70 71 // Save any utf-16 characters seen in the previous line 72 if !utf16_chars.is_empty() { 73 utf16_lines.insert(line, utf16_chars); 74 utf16_chars = Vec::new(); 75 } 76 77 // Prepare for processing the next line 78 curr_col = 0.into(); 79 line += 1; 80 continue; 81 } 82 83 if !c.is_ascii() { 84 utf16_chars.push(Utf16Char { start: curr_col, end: curr_col + c_len }); 85 } 86 87 curr_col += c_len; 88 } 89 90 // Save any utf-16 characters seen in the last line 91 if !utf16_chars.is_empty() { 92 utf16_lines.insert(line, utf16_chars); 93 } 94 95 LineIndex { newlines, utf16_lines } 96 } 97 line_col(&self, offset: TextSize) -> LineCol98 pub fn line_col(&self, offset: TextSize) -> LineCol { 99 let line = self.newlines.partition_point(|&it| it <= offset) - 1; 100 let line_start_offset = self.newlines[line]; 101 let col = offset - line_start_offset; 102 LineCol { line: line as u32, col: col.into() } 103 } 104 offset(&self, line_col: LineCol) -> TextSize105 pub fn offset(&self, line_col: LineCol) -> TextSize { 106 self.newlines[line_col.line as usize] + TextSize::from(line_col.col) 107 } 108 to_utf16(&self, line_col: LineCol) -> LineColUtf16109 pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 { 110 let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into()); 111 LineColUtf16 { line: line_col.line, col: col as u32 } 112 } 113 to_utf8(&self, line_col: LineColUtf16) -> LineCol114 pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol { 115 let col = self.utf16_to_utf8_col(line_col.line, line_col.col); 116 LineCol { line: line_col.line, col: col.into() } 117 } 118 lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_119 pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ { 120 let lo = self.newlines.partition_point(|&it| it < range.start()); 121 let hi = self.newlines.partition_point(|&it| it <= range.end()); 122 let all = iter::once(range.start()) 123 .chain(self.newlines[lo..hi].iter().copied()) 124 .chain(iter::once(range.end())); 125 126 all.clone() 127 .zip(all.skip(1)) 128 .map(|(lo, hi)| TextRange::new(lo, hi)) 129 .filter(|it| !it.is_empty()) 130 } 131 utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize132 fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize { 133 let mut res: usize = col.into(); 134 if let Some(utf16_chars) = self.utf16_lines.get(&line) { 135 for c in utf16_chars { 136 if c.end <= col { 137 res -= usize::from(c.len()) - c.len_utf16(); 138 } else { 139 // From here on, all utf16 characters come *after* the character we are mapping, 140 // so we don't need to take them into account 141 break; 142 } 143 } 144 } 145 res 146 } 147 utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize148 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { 149 if let Some(utf16_chars) = self.utf16_lines.get(&line) { 150 for c in utf16_chars { 151 if col > u32::from(c.start) { 152 col += u32::from(c.len()) - c.len_utf16() as u32; 153 } else { 154 // From here on, all utf16 characters come *after* the character we are mapping, 155 // so we don't need to take them into account 156 break; 157 } 158 } 159 } 160 161 col.into() 162 } 163 } 164 165 #[cfg(test)] 166 mod tests; 167