1 //! Convert LSP Range to Kakoune's range-spec, and other position-related utilities.
2 //! Easy part:
3 //! * LSP ranges are 0-based, but Kakoune's are 1-based.
4 //! * LSP ranges are exclusive, but Kakoune's are inclusive.
5 //! This could be solved by applying a proper offset. A bit more tricky is that to include
6 //! line ending character LSP range uses an end position denoting the start of the next
7 //! line. This could be solved by keeping the current line, but setting character offset
8 //! to an arbitrarily large value, and Kakoune will clamp it to the end of line. The
9 //! hard part is that LSP uses UTF-16 code units to count character offset, but Kakoune
10 //! expects bytes. It requires analysis of the buffer content for proper translation.
11 //! The hardest part is that language servers mostly don't respect the spec, and in a
12 //! inconsistent way. See https://github.com/Microsoft/language-server-protocol/issues/376 and
13 //! https://www.reddit.com/r/vim/comments/b3yzq4/a_lsp_client_maintainers_view_of_the_lsp_protocol/
14 //! for a bit more details.
15 //! Temporarily resolution for this problem in kak-lsp is as follows: treat LSP character offset as
16 //! Unicode scalar value in UTF-8 encoding (and then convert it into byte offset for Kakoune) by
17 //! default, and treat offset as byte one if specified in the config. It's a horrible violation of
18 //! both spec and the most obvious spec alternative (UTF-8 code units aka just bytes), but it seems
19 //! like a viable pragmatic solution before we start to dig deep into the proper support.
20 //! Pros of this solution for UTF-8 encoded text (and kak-lsp doesn't support other encodings yet):
21 //! * It's relatively easy to implement in a performant way (thanks to ropey).
22 //! * It works for entire Basic Multilingual Plane when language server adheres to spec.
23 //! * It just works when language server sends offset in UTF-8 scalar values (i.e. RLS).
24 //! * It works for at least Basic Latin when language server sends offset in UTF-8 bytes
25 //! (i.e. pyls, clangd with offsetEncoding: utf-8).
26 //! And just works when `offset_encoding: utf-8` is provided in the config.
27 use crate::types::*;
28 use crate::{context::Context, util::read_document};
29 use lsp_types::*;
30 use ropey::{Rope, RopeSlice};
31 use std::cmp::min;
32
33 pub const EOL_OFFSET: u32 = 1_000_000;
34
35 /// Convert LSP Range to Kakoune's range-spec.
lsp_range_to_kakoune( range: &Range, text: &Rope, offset_encoding: OffsetEncoding, ) -> KakouneRange36 pub fn lsp_range_to_kakoune(
37 range: &Range,
38 text: &Rope,
39 offset_encoding: OffsetEncoding,
40 ) -> KakouneRange {
41 match offset_encoding {
42 OffsetEncoding::Utf8 => lsp_range_to_kakoune_utf_8_code_units(range),
43 // Not a proper UTF-16 code units handling, but works within BMP
44 OffsetEncoding::Utf16 => lsp_range_to_kakoune_utf_8_code_points(range, text),
45 }
46 }
47
lsp_position_to_kakoune( position: &Position, text: &Rope, offset_encoding: OffsetEncoding, ) -> KakounePosition48 pub fn lsp_position_to_kakoune(
49 position: &Position,
50 text: &Rope,
51 offset_encoding: OffsetEncoding,
52 ) -> KakounePosition {
53 match offset_encoding {
54 OffsetEncoding::Utf8 => lsp_position_to_kakoune_utf_8_code_units(position),
55 // Not a proper UTF-16 code units handling, but works within BMP
56 OffsetEncoding::Utf16 => lsp_position_to_kakoune_utf_8_code_points(position, text),
57 }
58 }
59
kakoune_position_to_lsp( position: &KakounePosition, text: &Rope, offset_encoding: OffsetEncoding, ) -> Position60 pub fn kakoune_position_to_lsp(
61 position: &KakounePosition,
62 text: &Rope,
63 offset_encoding: OffsetEncoding,
64 ) -> Position {
65 match offset_encoding {
66 OffsetEncoding::Utf8 => kakoune_position_to_lsp_utf_8_code_units(position),
67 // Not a proper UTF-16 code units handling, but works within BMP
68 OffsetEncoding::Utf16 => kakoune_position_to_lsp_utf_8_code_points(position, text),
69 }
70 }
71
72 /// Wrapper for kakoune_position_to_lsp which uses context to get buffer content and offset encoding.
get_lsp_position( filename: &str, position: &KakounePosition, ctx: &Context, ) -> Option<Position>73 pub fn get_lsp_position(
74 filename: &str,
75 position: &KakounePosition,
76 ctx: &Context,
77 ) -> Option<Position> {
78 ctx.documents
79 .get(filename)
80 .map(|document| kakoune_position_to_lsp(position, &document.text, ctx.offset_encoding))
81 }
82
83 /// Wrapper for lsp_position_to_kakoune which uses context to get buffer content and offset encoding.
84 /// Reads the file directly if it is not present in context (is not open in editor).
get_kakoune_position( filename: &str, position: &Position, ctx: &Context, ) -> Option<KakounePosition>85 pub fn get_kakoune_position(
86 filename: &str,
87 position: &Position,
88 ctx: &Context,
89 ) -> Option<KakounePosition> {
90 get_file_contents(filename, ctx)
91 .map(|text| lsp_position_to_kakoune(position, &text, ctx.offset_encoding))
92 }
93
94 /// Like get_kakoune_position but default to an approximate position if something goes wrong.
get_kakoune_position_with_fallback( filename_str: &str, position: Position, ctx: &Context, ) -> KakounePosition95 pub fn get_kakoune_position_with_fallback(
96 filename_str: &str,
97 position: Position,
98 ctx: &Context,
99 ) -> KakounePosition {
100 get_kakoune_position(filename_str, &position, ctx).unwrap_or_else(|| KakounePosition {
101 line: position.line + 1,
102 column: position.character + 1,
103 })
104 }
105
106 /// Get the contents of a file.
107 /// Searches ctx.documents first and falls back to reading the file directly.
get_file_contents(filename: &str, ctx: &Context) -> Option<Rope>108 pub fn get_file_contents(filename: &str, ctx: &Context) -> Option<Rope> {
109 if let Some(doc) = ctx.documents.get(filename) {
110 return Some(doc.text.clone());
111 }
112
113 match read_document(filename) {
114 Ok(text) => Some(Rope::from_str(&text)),
115 Err(err) => {
116 error!("Failed to read file {}: {}", filename, err);
117 None
118 }
119 }
120 }
121
122 /// Get a line from a Rope
123 ///
124 /// If the line number is out-of-bounds, this will return the
125 /// last line. This is useful because the language server might
126 /// use a large value to convey "end of file".
get_line(line_number: usize, text: &Rope) -> RopeSlice127 pub fn get_line(line_number: usize, text: &Rope) -> RopeSlice {
128 text.line(min(line_number, text.len_lines() - 1))
129 }
130
131 /// Get the byte index of a character in a Rope slice
132 ///
133 /// If the char number is out-of-bounds, this will return one past
134 /// the last character. This is useful because the language
135 /// server might use a large value to convey "end of file".
get_byte_index(char_index: usize, text: RopeSlice) -> usize136 fn get_byte_index(char_index: usize, text: RopeSlice) -> usize {
137 text.char_to_byte(min(char_index, text.len_chars()))
138 }
139
lsp_range_to_kakoune_utf_8_code_points(range: &Range, text: &Rope) -> KakouneRange140 fn lsp_range_to_kakoune_utf_8_code_points(range: &Range, text: &Rope) -> KakouneRange {
141 let Range { start, end } = range;
142
143 let start_line = get_line(start.line as _, text);
144 let start_byte = get_byte_index(start.character as _, start_line) as u32;
145 let end_line = get_line(end.line as _, text);
146 let end_byte = get_byte_index(end.character as _, end_line) as u32;
147
148 lsp_range_to_kakoune_utf_8_code_units(&Range {
149 start: Position {
150 line: start.line,
151 character: start_byte,
152 },
153 end: Position {
154 line: end.line,
155 character: end_byte,
156 },
157 })
158 }
159
lsp_range_to_kakoune_utf_8_code_units(range: &Range) -> KakouneRange160 fn lsp_range_to_kakoune_utf_8_code_units(range: &Range) -> KakouneRange {
161 let Range { start, end } = range;
162 let insert = start.line == end.line && start.character == end.character;
163 // Beginning of line is a very special case as we need to produce selection on the line
164 // to insert, and then insert before that selection. Selecting end of the previous line
165 // and inserting after selection doesn't work well for delete+insert cases like this:
166 /*
167 [
168 {
169 "range": {
170 "start": {
171 "line": 5,
172 "character": 0
173 },
174 "end": {
175 "line": 6,
176 "character": 0
177 }
178 },
179 "newText": ""
180 },
181 {
182 "range": {
183 "start": {
184 "line": 6,
185 "character": 0
186 },
187 "end": {
188 "line": 6,
189 "character": 0
190 }
191 },
192 "newText": " fmt.Println(\"Hello, world!\")\n"
193 }
194 ]
195 */
196 let bol_insert = insert && end.character == 0;
197 let start_byte = start.character;
198
199 // Exclusive->inclusive range.end conversion will make 0-length LSP range into the reversed
200 // 2-length Kakoune range, but we want 1-length (the closest to 0 it can get in Kakoune ;-)).
201 let end_byte = if insert {
202 start_byte
203 } else if end.character > 0 {
204 // -1 because LSP ranges are exclusive, but Kakoune's are inclusive.
205 end.character - 1
206 } else {
207 EOL_OFFSET - 1
208 };
209
210 let end_line = if bol_insert || end.character > 0 {
211 end.line
212 } else {
213 end.line - 1
214 };
215
216 // +1 because LSP ranges are 0-based, but Kakoune's are 1-based.
217 KakouneRange {
218 start: KakounePosition {
219 line: start.line + 1,
220 column: start_byte + 1,
221 },
222 end: KakounePosition {
223 line: end_line + 1,
224 column: end_byte + 1,
225 },
226 }
227 }
228
kakoune_position_to_lsp_utf_8_code_points(position: &KakounePosition, text: &Rope) -> Position229 fn kakoune_position_to_lsp_utf_8_code_points(position: &KakounePosition, text: &Rope) -> Position {
230 // -1 because LSP & Rope ranges are 0-based, but Kakoune's are 1-based.
231 let line_idx = position.line - 1;
232 let col_idx = position.column - 1;
233 if line_idx as usize >= text.len_lines() {
234 return Position {
235 line: line_idx,
236 character: col_idx,
237 };
238 }
239
240 let line = text.line(line_idx as _);
241 if col_idx as usize >= line.len_bytes() {
242 return Position {
243 line: line_idx,
244 character: col_idx,
245 };
246 }
247
248 let character = line.byte_to_char(col_idx as _) as _;
249 Position {
250 line: line_idx,
251 character,
252 }
253 }
254
kakoune_position_to_lsp_utf_8_code_units(position: &KakounePosition) -> Position255 fn kakoune_position_to_lsp_utf_8_code_units(position: &KakounePosition) -> Position {
256 // -1 because LSP ranges are 0-based, but Kakoune's are 1-based.
257 Position {
258 line: position.line - 1,
259 character: position.column - 1,
260 }
261 }
262
lsp_position_to_kakoune_utf_8_code_points(position: &Position, text: &Rope) -> KakounePosition263 fn lsp_position_to_kakoune_utf_8_code_points(position: &Position, text: &Rope) -> KakounePosition {
264 if position.line as usize >= text.len_lines() {
265 return KakounePosition {
266 line: position.line + 1,
267 column: 999999999,
268 };
269 }
270
271 let line = text.line(position.line as _);
272 if position.character as usize >= line.len_chars() {
273 return KakounePosition {
274 line: position.line + 1,
275 column: 999999999,
276 };
277 }
278
279 let byte = line.char_to_byte(position.character as _) as u32;
280 // +1 because LSP ranges are 0-based, but Kakoune's are 1-based.
281 KakounePosition {
282 line: position.line + 1,
283 column: byte + 1,
284 }
285 }
286
lsp_position_to_kakoune_utf_8_code_units(position: &Position) -> KakounePosition287 fn lsp_position_to_kakoune_utf_8_code_units(position: &Position) -> KakounePosition {
288 // +1 because LSP ranges are 0-based, but Kakoune's are 1-based.
289 KakounePosition {
290 line: position.line + 1,
291 column: position.character + 1,
292 }
293 }
294
295 #[cfg(test)]
296 mod tests {
297 use super::*;
298
299 #[test]
lsp_range_to_kakoune_utf_8_code_units_bol_insert()300 fn lsp_range_to_kakoune_utf_8_code_units_bol_insert() {
301 assert_eq!(
302 lsp_range_to_kakoune_utf_8_code_units(&Range {
303 start: Position {
304 line: 10,
305 character: 0
306 },
307 end: Position {
308 line: 10,
309 character: 0
310 }
311 }),
312 KakouneRange {
313 start: KakounePosition {
314 line: 11,
315 column: 1
316 },
317 end: KakounePosition {
318 line: 11,
319 column: 1
320 }
321 }
322 );
323 }
324
325 #[test]
lsp_range_to_kakoune_utf_8_code_units_bof_insert()326 fn lsp_range_to_kakoune_utf_8_code_units_bof_insert() {
327 assert_eq!(
328 lsp_range_to_kakoune_utf_8_code_units(&Range {
329 start: Position {
330 line: 0,
331 character: 0
332 },
333 end: Position {
334 line: 0,
335 character: 0
336 }
337 }),
338 KakouneRange {
339 start: KakounePosition { line: 1, column: 1 },
340 end: KakounePosition { line: 1, column: 1 }
341 }
342 );
343 }
344
345 #[test]
lsp_range_to_kakoune_utf_8_code_units_eol()346 fn lsp_range_to_kakoune_utf_8_code_units_eol() {
347 assert_eq!(
348 lsp_range_to_kakoune_utf_8_code_units(&Range {
349 start: Position {
350 line: 10,
351 character: 0
352 },
353 end: Position {
354 line: 11,
355 character: 0
356 }
357 }),
358 KakouneRange {
359 start: KakounePosition {
360 line: 11,
361 column: 1
362 },
363 end: KakounePosition {
364 line: 11,
365 column: EOL_OFFSET
366 }
367 }
368 );
369 }
370 }
371