1 //! Convert LSP Range to Kakoune's range-spec, and other position-related utilities.
2 //! Easy part:
3 //! * LSP ranges are 0-based, but Kakoune's are 1-based.
4 //! * LSP ranges are exclusive, but Kakoune's are inclusive.
5 //! This could be solved by applying a proper offset. A bit more tricky is that to include
6 //! line ending character LSP range uses an end position denoting the start of the next
7 //! line. This could be solved by keeping the current line, but setting character offset
8 //! to an arbitrarily large value, and Kakoune will clamp it to the end of line. The
9 //! hard part is that LSP uses UTF-16 code units to count character offset, but Kakoune
10 //! expects bytes. It requires analysis of the buffer content for proper translation.
11 //! The hardest part is that language servers mostly don't respect the spec, and in a
12 //! inconsistent way. See https://github.com/Microsoft/language-server-protocol/issues/376 and
13 //! https://www.reddit.com/r/vim/comments/b3yzq4/a_lsp_client_maintainers_view_of_the_lsp_protocol/
14 //! for a bit more details.
15 //! Temporarily resolution for this problem in kak-lsp is as follows: treat LSP character offset as
16 //! Unicode scalar value in UTF-8 encoding (and then convert it into byte offset for Kakoune) by
17 //! default, and treat offset as byte one if specified in the config. It's a horrible violation of
18 //! both spec and the most obvious spec alternative (UTF-8 code units aka just bytes), but it seems
19 //! like a viable pragmatic solution before we start to dig deep into the proper support.
20 //! Pros of this solution for UTF-8 encoded text (and kak-lsp doesn't support other encodings yet):
21 //! * It's relatively easy to implement in a performant way (thanks to ropey).
22 //! * It works for entire Basic Multilingual Plane when language server adheres to spec.
23 //! * It just works when language server sends offset in UTF-8 scalar values (i.e. RLS).
24 //! * It works for at least Basic Latin when language server sends offset in UTF-8 bytes
25 //!   (i.e. pyls, clangd with offsetEncoding: utf-8).
26 //!   And just works when `offset_encoding: utf-8` is provided in the config.
27 use crate::types::*;
28 use crate::{context::Context, util::read_document};
29 use lsp_types::*;
30 use ropey::{Rope, RopeSlice};
31 use std::cmp::min;
32 
33 pub const EOL_OFFSET: u32 = 1_000_000;
34 
35 /// Convert LSP Range to Kakoune's range-spec.
lsp_range_to_kakoune( range: &Range, text: &Rope, offset_encoding: OffsetEncoding, ) -> KakouneRange36 pub fn lsp_range_to_kakoune(
37     range: &Range,
38     text: &Rope,
39     offset_encoding: OffsetEncoding,
40 ) -> KakouneRange {
41     match offset_encoding {
42         OffsetEncoding::Utf8 => lsp_range_to_kakoune_utf_8_code_units(range),
43         // Not a proper UTF-16 code units handling, but works within BMP
44         OffsetEncoding::Utf16 => lsp_range_to_kakoune_utf_8_code_points(range, text),
45     }
46 }
47 
lsp_position_to_kakoune( position: &Position, text: &Rope, offset_encoding: OffsetEncoding, ) -> KakounePosition48 pub fn lsp_position_to_kakoune(
49     position: &Position,
50     text: &Rope,
51     offset_encoding: OffsetEncoding,
52 ) -> KakounePosition {
53     match offset_encoding {
54         OffsetEncoding::Utf8 => lsp_position_to_kakoune_utf_8_code_units(position),
55         // Not a proper UTF-16 code units handling, but works within BMP
56         OffsetEncoding::Utf16 => lsp_position_to_kakoune_utf_8_code_points(position, text),
57     }
58 }
59 
kakoune_position_to_lsp( position: &KakounePosition, text: &Rope, offset_encoding: OffsetEncoding, ) -> Position60 pub fn kakoune_position_to_lsp(
61     position: &KakounePosition,
62     text: &Rope,
63     offset_encoding: OffsetEncoding,
64 ) -> Position {
65     match offset_encoding {
66         OffsetEncoding::Utf8 => kakoune_position_to_lsp_utf_8_code_units(position),
67         // Not a proper UTF-16 code units handling, but works within BMP
68         OffsetEncoding::Utf16 => kakoune_position_to_lsp_utf_8_code_points(position, text),
69     }
70 }
71 
72 /// Wrapper for kakoune_position_to_lsp which uses context to get buffer content and offset encoding.
get_lsp_position( filename: &str, position: &KakounePosition, ctx: &Context, ) -> Option<Position>73 pub fn get_lsp_position(
74     filename: &str,
75     position: &KakounePosition,
76     ctx: &Context,
77 ) -> Option<Position> {
78     ctx.documents
79         .get(filename)
80         .map(|document| kakoune_position_to_lsp(position, &document.text, ctx.offset_encoding))
81 }
82 
83 /// Wrapper for lsp_position_to_kakoune which uses context to get buffer content and offset encoding.
84 /// Reads the file directly if it is not present in context (is not open in editor).
get_kakoune_position( filename: &str, position: &Position, ctx: &Context, ) -> Option<KakounePosition>85 pub fn get_kakoune_position(
86     filename: &str,
87     position: &Position,
88     ctx: &Context,
89 ) -> Option<KakounePosition> {
90     get_file_contents(filename, ctx)
91         .map(|text| lsp_position_to_kakoune(position, &text, ctx.offset_encoding))
92 }
93 
94 /// Like get_kakoune_position but default to an approximate position if something goes wrong.
get_kakoune_position_with_fallback( filename_str: &str, position: Position, ctx: &Context, ) -> KakounePosition95 pub fn get_kakoune_position_with_fallback(
96     filename_str: &str,
97     position: Position,
98     ctx: &Context,
99 ) -> KakounePosition {
100     get_kakoune_position(filename_str, &position, ctx).unwrap_or_else(|| KakounePosition {
101         line: position.line + 1,
102         column: position.character + 1,
103     })
104 }
105 
106 /// Get the contents of a file.
107 /// Searches ctx.documents first and falls back to reading the file directly.
get_file_contents(filename: &str, ctx: &Context) -> Option<Rope>108 pub fn get_file_contents(filename: &str, ctx: &Context) -> Option<Rope> {
109     if let Some(doc) = ctx.documents.get(filename) {
110         return Some(doc.text.clone());
111     }
112 
113     match read_document(filename) {
114         Ok(text) => Some(Rope::from_str(&text)),
115         Err(err) => {
116             error!("Failed to read file {}: {}", filename, err);
117             None
118         }
119     }
120 }
121 
122 /// Get a line from a Rope
123 ///
124 /// If the line number is out-of-bounds, this will return the
125 /// last line. This is useful because the language server might
126 /// use a large value to convey "end of file".
get_line(line_number: usize, text: &Rope) -> RopeSlice127 pub fn get_line(line_number: usize, text: &Rope) -> RopeSlice {
128     text.line(min(line_number, text.len_lines() - 1))
129 }
130 
131 /// Get the byte index of a character in a Rope slice
132 ///
133 /// If the char number is out-of-bounds, this will return one past
134 /// the last character. This is useful because the language
135 /// server might use a large value to convey "end of file".
get_byte_index(char_index: usize, text: RopeSlice) -> usize136 fn get_byte_index(char_index: usize, text: RopeSlice) -> usize {
137     text.char_to_byte(min(char_index, text.len_chars()))
138 }
139 
lsp_range_to_kakoune_utf_8_code_points(range: &Range, text: &Rope) -> KakouneRange140 fn lsp_range_to_kakoune_utf_8_code_points(range: &Range, text: &Rope) -> KakouneRange {
141     let Range { start, end } = range;
142 
143     let start_line = get_line(start.line as _, text);
144     let start_byte = get_byte_index(start.character as _, start_line) as u32;
145     let end_line = get_line(end.line as _, text);
146     let end_byte = get_byte_index(end.character as _, end_line) as u32;
147 
148     lsp_range_to_kakoune_utf_8_code_units(&Range {
149         start: Position {
150             line: start.line,
151             character: start_byte,
152         },
153         end: Position {
154             line: end.line,
155             character: end_byte,
156         },
157     })
158 }
159 
lsp_range_to_kakoune_utf_8_code_units(range: &Range) -> KakouneRange160 fn lsp_range_to_kakoune_utf_8_code_units(range: &Range) -> KakouneRange {
161     let Range { start, end } = range;
162     let insert = start.line == end.line && start.character == end.character;
163     // Beginning of line is a very special case as we need to produce selection on the line
164     // to insert, and then insert before that selection. Selecting end of the previous line
165     // and inserting after selection doesn't work well for delete+insert cases like this:
166     /*
167         [
168           {
169             "range": {
170               "start": {
171                 "line": 5,
172                 "character": 0
173               },
174               "end": {
175                 "line": 6,
176                 "character": 0
177               }
178             },
179             "newText": ""
180           },
181           {
182             "range": {
183               "start": {
184                 "line": 6,
185                 "character": 0
186               },
187               "end": {
188                 "line": 6,
189                 "character": 0
190               }
191             },
192             "newText": "	fmt.Println(\"Hello, world!\")\n"
193           }
194         ]
195     */
196     let bol_insert = insert && end.character == 0;
197     let start_byte = start.character;
198 
199     // Exclusive->inclusive range.end conversion will make 0-length LSP range into the reversed
200     // 2-length Kakoune range, but we want 1-length (the closest to 0 it can get in Kakoune ;-)).
201     let end_byte = if insert {
202         start_byte
203     } else if end.character > 0 {
204         // -1 because LSP ranges are exclusive, but Kakoune's are inclusive.
205         end.character - 1
206     } else {
207         EOL_OFFSET - 1
208     };
209 
210     let end_line = if bol_insert || end.character > 0 {
211         end.line
212     } else {
213         end.line - 1
214     };
215 
216     // +1 because LSP ranges are 0-based, but Kakoune's are 1-based.
217     KakouneRange {
218         start: KakounePosition {
219             line: start.line + 1,
220             column: start_byte + 1,
221         },
222         end: KakounePosition {
223             line: end_line + 1,
224             column: end_byte + 1,
225         },
226     }
227 }
228 
kakoune_position_to_lsp_utf_8_code_points(position: &KakounePosition, text: &Rope) -> Position229 fn kakoune_position_to_lsp_utf_8_code_points(position: &KakounePosition, text: &Rope) -> Position {
230     // -1 because LSP & Rope ranges are 0-based, but Kakoune's are 1-based.
231     let line_idx = position.line - 1;
232     let col_idx = position.column - 1;
233     if line_idx as usize >= text.len_lines() {
234         return Position {
235             line: line_idx,
236             character: col_idx,
237         };
238     }
239 
240     let line = text.line(line_idx as _);
241     if col_idx as usize >= line.len_bytes() {
242         return Position {
243             line: line_idx,
244             character: col_idx,
245         };
246     }
247 
248     let character = line.byte_to_char(col_idx as _) as _;
249     Position {
250         line: line_idx,
251         character,
252     }
253 }
254 
kakoune_position_to_lsp_utf_8_code_units(position: &KakounePosition) -> Position255 fn kakoune_position_to_lsp_utf_8_code_units(position: &KakounePosition) -> Position {
256     // -1 because LSP ranges are 0-based, but Kakoune's are 1-based.
257     Position {
258         line: position.line - 1,
259         character: position.column - 1,
260     }
261 }
262 
lsp_position_to_kakoune_utf_8_code_points(position: &Position, text: &Rope) -> KakounePosition263 fn lsp_position_to_kakoune_utf_8_code_points(position: &Position, text: &Rope) -> KakounePosition {
264     if position.line as usize >= text.len_lines() {
265         return KakounePosition {
266             line: position.line + 1,
267             column: 999999999,
268         };
269     }
270 
271     let line = text.line(position.line as _);
272     if position.character as usize >= line.len_chars() {
273         return KakounePosition {
274             line: position.line + 1,
275             column: 999999999,
276         };
277     }
278 
279     let byte = line.char_to_byte(position.character as _) as u32;
280     // +1 because LSP ranges are 0-based, but Kakoune's are 1-based.
281     KakounePosition {
282         line: position.line + 1,
283         column: byte + 1,
284     }
285 }
286 
lsp_position_to_kakoune_utf_8_code_units(position: &Position) -> KakounePosition287 fn lsp_position_to_kakoune_utf_8_code_units(position: &Position) -> KakounePosition {
288     // +1 because LSP ranges are 0-based, but Kakoune's are 1-based.
289     KakounePosition {
290         line: position.line + 1,
291         column: position.character + 1,
292     }
293 }
294 
295 #[cfg(test)]
296 mod tests {
297     use super::*;
298 
299     #[test]
lsp_range_to_kakoune_utf_8_code_units_bol_insert()300     fn lsp_range_to_kakoune_utf_8_code_units_bol_insert() {
301         assert_eq!(
302             lsp_range_to_kakoune_utf_8_code_units(&Range {
303                 start: Position {
304                     line: 10,
305                     character: 0
306                 },
307                 end: Position {
308                     line: 10,
309                     character: 0
310                 }
311             }),
312             KakouneRange {
313                 start: KakounePosition {
314                     line: 11,
315                     column: 1
316                 },
317                 end: KakounePosition {
318                     line: 11,
319                     column: 1
320                 }
321             }
322         );
323     }
324 
325     #[test]
lsp_range_to_kakoune_utf_8_code_units_bof_insert()326     fn lsp_range_to_kakoune_utf_8_code_units_bof_insert() {
327         assert_eq!(
328             lsp_range_to_kakoune_utf_8_code_units(&Range {
329                 start: Position {
330                     line: 0,
331                     character: 0
332                 },
333                 end: Position {
334                     line: 0,
335                     character: 0
336                 }
337             }),
338             KakouneRange {
339                 start: KakounePosition { line: 1, column: 1 },
340                 end: KakounePosition { line: 1, column: 1 }
341             }
342         );
343     }
344 
345     #[test]
lsp_range_to_kakoune_utf_8_code_units_eol()346     fn lsp_range_to_kakoune_utf_8_code_units_eol() {
347         assert_eq!(
348             lsp_range_to_kakoune_utf_8_code_units(&Range {
349                 start: Position {
350                     line: 10,
351                     character: 0
352                 },
353                 end: Position {
354                     line: 11,
355                     character: 0
356                 }
357             }),
358             KakouneRange {
359                 start: KakounePosition {
360                     line: 11,
361                     column: 1
362                 },
363                 end: KakounePosition {
364                     line: 11,
365                     column: EOL_OFFSET
366                 }
367             }
368         );
369     }
370 }
371