1 #include "lex_utils.h"
2
3 #include <doctest/doctest.h>
4
5 #include <algorithm>
6 #include <iostream>
7
8 // VSCode (UTF-16) disagrees with Emacs lsp-mode (UTF-8) on how to represent
9 // text documents.
10 // We use a UTF-8 iterator to approximate UTF-16 in the specification (weird).
11 // This is good enough and fails only for UTF-16 surrogate pairs.
GetOffsetForPosition(lsPosition position,std::string_view content)12 int GetOffsetForPosition(lsPosition position, std::string_view content) {
13 size_t i = 0;
14 // Iterate lines until we have found the correct line.
15 while (position.line > 0 && i < content.size()) {
16 if (content[i] == '\n')
17 position.line--;
18 i++;
19 }
20 // Iterate characters on the target line.
21 while (position.character > 0 && i < content.size()) {
22 if (uint8_t(content[i++]) >= 128) {
23 // Skip 0b10xxxxxx
24 while (i < content.size() && uint8_t(content[i]) >= 128 &&
25 uint8_t(content[i]) < 192)
26 i++;
27 }
28 position.character--;
29 }
30 return int(i);
31 }
32
GetPositionForOffset(int offset,std::string_view content)33 lsPosition GetPositionForOffset(int offset, std::string_view content) {
34 lsPosition result;
35 for (int i = 0; i < offset && i < content.length(); ++i) {
36 if (content[i] == '\n') {
37 result.line++;
38 result.character = 0;
39 } else {
40 result.character++;
41 }
42 }
43 return result;
44 }
45
CharPos(std::string_view search,char character,int character_offset)46 lsPosition CharPos(std::string_view search,
47 char character,
48 int character_offset) {
49 lsPosition result;
50 size_t index = 0;
51 while (index < search.size()) {
52 char c = search[index];
53 if (c == character)
54 break;
55 if (c == '\n') {
56 result.line += 1;
57 result.character = 0;
58 } else {
59 result.character += 1;
60 }
61 ++index;
62 }
63 assert(index < search.size());
64 result.character += character_offset;
65 return result;
66 }
67
68 // TODO: eliminate |line_number| param.
ExtractQuotedRange(int line_number,const std::string & line)69 optional<lsRange> ExtractQuotedRange(int line_number, const std::string& line) {
70 // Find starting and ending quote.
71 int start = 0;
72 while (start < (int)line.size()) {
73 char c = line[start];
74 ++start;
75 if (c == '"' || c == '<')
76 break;
77 }
78 if (start == (int)line.size())
79 return nullopt;
80
81 int end = (int)line.size();
82 while (end > 0) {
83 char c = line[end];
84 if (c == '"' || c == '>')
85 break;
86 --end;
87 }
88
89 if (start >= end)
90 return nullopt;
91
92 return lsRange(lsPosition(line_number, start), lsPosition(line_number, end));
93 }
94
LexFunctionDeclaration(const std::string & buffer_content,lsPosition declaration_spelling,optional<std::string> type_name,std::string * insert_text,int * newlines_after_name)95 void LexFunctionDeclaration(const std::string& buffer_content,
96 lsPosition declaration_spelling,
97 optional<std::string> type_name,
98 std::string* insert_text,
99 int* newlines_after_name) {
100 int name_start = GetOffsetForPosition(declaration_spelling, buffer_content);
101
102 bool parse_return_type = true;
103 // We need to check if we have a return type (ctors and dtors do not).
104 if (type_name) {
105 int name_end = name_start;
106 while (name_end < buffer_content.size()) {
107 char c = buffer_content[name_end];
108 if (isspace(c) || c == '(')
109 break;
110 ++name_end;
111 }
112
113 std::string func_name =
114 buffer_content.substr(name_start, name_end - name_start);
115 if (func_name == *type_name || func_name == ("~" + *type_name))
116 parse_return_type = false;
117 }
118
119 // We need to fetch the return type. This can get complex, ie,
120 //
121 // std::vector <int> foo();
122 //
123 int return_start = name_start;
124 if (parse_return_type) {
125 int paren_balance = 0;
126 int angle_balance = 0;
127 bool expect_token = true;
128 while (return_start > 0) {
129 char c = buffer_content[return_start - 1];
130 if (paren_balance == 0 && angle_balance == 0) {
131 if (isspace(c) && !expect_token) {
132 break;
133 }
134 if (!isspace(c))
135 expect_token = false;
136 }
137
138 if (c == ')')
139 ++paren_balance;
140 if (c == '(') {
141 --paren_balance;
142 expect_token = true;
143 }
144
145 if (c == '>')
146 ++angle_balance;
147 if (c == '<') {
148 --angle_balance;
149 expect_token = true;
150 }
151
152 return_start -= 1;
153 }
154 }
155
156 // We need to fetch the arguments. Just scan for the next ';'.
157 *newlines_after_name = 0;
158 int end = name_start;
159 while (end < buffer_content.size()) {
160 char c = buffer_content[end];
161 if (c == ';')
162 break;
163 if (c == '\n')
164 *newlines_after_name += 1;
165 ++end;
166 }
167
168 std::string result;
169 result += buffer_content.substr(return_start, name_start - return_start);
170 if (type_name && !type_name->empty())
171 result += *type_name + "::";
172 result += buffer_content.substr(name_start, end - name_start);
173 TrimEndInPlace(result);
174 result += " {\n}";
175 *insert_text = result;
176 }
177
LexIdentifierAroundPos(lsPosition position,std::string_view content)178 std::string_view LexIdentifierAroundPos(lsPosition position,
179 std::string_view content) {
180 int start = GetOffsetForPosition(position, content);
181 int end = start + 1;
182 char c;
183
184 // We search for :: before the cursor but not after to get the qualifier.
185 for (; start > 0; start--) {
186 c = content[start - 1];
187 if (isalnum(c) || c == '_')
188 ;
189 else if (c == ':' && start > 1 && content[start - 2] == ':')
190 start--;
191 else
192 break;
193 }
194
195 for (; end < (int)content.size(); end++)
196 if (c = content[end], !(isalnum(c) || c == '_'))
197 break;
198
199 return content.substr(start, end - start);
200 }
201
202 // Find discontinous |search| in |content|.
203 // Return |found| and the count of skipped chars before found.
CaseFoldingSubsequenceMatch(std::string_view search,std::string_view content)204 std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
205 std::string_view content) {
206 bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper);
207 int skip = 0;
208 size_t j = 0;
209 for (char c : search) {
210 while (j < content.size() &&
211 (hasUppercaseLetter ? content[j] != c
212 : tolower(content[j]) != tolower(c)))
213 ++j, ++skip;
214 if (j == content.size())
215 return {false, skip};
216 ++j;
217 }
218 return {true, skip};
219 }
220
221 TEST_SUITE("Offset") {
222 TEST_CASE("past end") {
223 std::string content = "foo";
224 int offset = GetOffsetForPosition(lsPosition(10, 10), content);
225 REQUIRE(offset <= content.size());
226 }
227
228 TEST_CASE("in middle of content") {
229 std::string content = "abcdefghijk";
230 for (int i = 0; i < content.size(); ++i) {
231 int offset = GetOffsetForPosition(lsPosition(0, i), content);
232 REQUIRE(i == offset);
233 }
234 }
235
236 TEST_CASE("at end of content") {
237 REQUIRE(GetOffsetForPosition(lsPosition(0, 0), "") == 0);
238 REQUIRE(GetOffsetForPosition(lsPosition(0, 1), "a") == 1);
239 }
240 }
241
242 TEST_SUITE("offset") {
243 TEST_CASE("some") {
244 REQUIRE(GetPositionForOffset(0, "012345\n012345") == lsPosition(0, 0));
245 REQUIRE(GetPositionForOffset(1, "012345\n012345") == lsPosition(0, 1));
246 REQUIRE(GetPositionForOffset(2, "012345\n012345") == lsPosition(0, 2));
247 REQUIRE(GetPositionForOffset(3, "012345\n012345") == lsPosition(0, 3));
248 REQUIRE(GetPositionForOffset(4, "012345\n012345") == lsPosition(0, 4));
249 REQUIRE(GetPositionForOffset(5, "012345\n012345") == lsPosition(0, 5));
250 REQUIRE(GetPositionForOffset(6, "012345\n012345") == lsPosition(0, 6));
251 REQUIRE(GetPositionForOffset(7, "012345\n012345") == lsPosition(1, 0));
252 REQUIRE(GetPositionForOffset(8, "012345\n012345") == lsPosition(1, 1));
253 REQUIRE(GetPositionForOffset(9, "012345\n012345") == lsPosition(1, 2));
254 REQUIRE(GetPositionForOffset(10, "012345\n012345") == lsPosition(1, 3));
255 REQUIRE(GetPositionForOffset(11, "012345\n012345") == lsPosition(1, 4));
256 REQUIRE(GetPositionForOffset(12, "012345\n012345") == lsPosition(1, 5));
257
258 // Overflow
259 REQUIRE(GetPositionForOffset(13, "012345\n012345") == lsPosition(1, 6));
260 REQUIRE(GetPositionForOffset(100, "012345\n012345") == lsPosition(1, 6));
261 }
262
263 TEST_CASE("overflow") {
264 REQUIRE(GetOffsetForPosition(lsPosition(0, 0), "a") == 0);
265 REQUIRE(GetOffsetForPosition(lsPosition(0, 1), "a") == 1);
266 REQUIRE(GetPositionForOffset(0, "0") == lsPosition(0, 0));
267 REQUIRE(GetPositionForOffset(1, "0") == lsPosition(0, 1));
268 REQUIRE(GetPositionForOffset(5, "0") == lsPosition(0, 1));
269 }
270 }
271
272 TEST_SUITE("Substring") {
273 TEST_CASE("skip") {
274 REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
275 REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
276 REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
277 REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
278 REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
279 std::make_pair(true, 1));
280 REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
281 std::make_pair(true, 1));
282 REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
283 REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
284 std::make_pair(false, 1));
285 REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
286 std::make_pair(true, 7));
287 }
288 }
289
290 TEST_SUITE("LexFunctionDeclaration") {
291 TEST_CASE("simple") {
292 std::string buffer_content = " void Foo(); ";
293 lsPosition declaration = CharPos(buffer_content, 'F');
294 std::string insert_text;
295 int newlines_after_name = 0;
296
297 LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
298 &newlines_after_name);
299 REQUIRE(insert_text == "void Foo() {\n}");
300 REQUIRE(newlines_after_name == 0);
301
302 LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
303 &insert_text, &newlines_after_name);
304 REQUIRE(insert_text == "void Type::Foo() {\n}");
305 REQUIRE(newlines_after_name == 0);
306 }
307
308 TEST_CASE("ctor") {
309 std::string buffer_content = " Foo(); ";
310 lsPosition declaration = CharPos(buffer_content, 'F');
311 std::string insert_text;
312 int newlines_after_name = 0;
313
314 LexFunctionDeclaration(buffer_content, declaration, std::string("Foo"),
315 &insert_text, &newlines_after_name);
316 REQUIRE(insert_text == "Foo::Foo() {\n}");
317 REQUIRE(newlines_after_name == 0);
318 }
319
320 TEST_CASE("dtor") {
321 std::string buffer_content = " ~Foo(); ";
322 lsPosition declaration = CharPos(buffer_content, '~');
323 std::string insert_text;
324 int newlines_after_name = 0;
325
326 LexFunctionDeclaration(buffer_content, declaration, std::string("Foo"),
327 &insert_text, &newlines_after_name);
328 REQUIRE(insert_text == "Foo::~Foo() {\n}");
329 REQUIRE(newlines_after_name == 0);
330 }
331
332 TEST_CASE("complex return type") {
333 std::string buffer_content = " std::vector<int> Foo(); ";
334 lsPosition declaration = CharPos(buffer_content, 'F');
335 std::string insert_text;
336 int newlines_after_name = 0;
337
338 LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
339 &newlines_after_name);
340 REQUIRE(insert_text == "std::vector<int> Foo() {\n}");
341 REQUIRE(newlines_after_name == 0);
342
343 LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
344 &insert_text, &newlines_after_name);
345 REQUIRE(insert_text == "std::vector<int> Type::Foo() {\n}");
346 REQUIRE(newlines_after_name == 0);
347 }
348
349 TEST_CASE("extra complex return type") {
350 std::string buffer_content = " std::function < int() > \n Foo(); ";
351 lsPosition declaration = CharPos(buffer_content, 'F');
352 std::string insert_text;
353 int newlines_after_name = 0;
354
355 LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
356 &newlines_after_name);
357 REQUIRE(insert_text == "std::function < int() > \n Foo() {\n}");
358 REQUIRE(newlines_after_name == 0);
359
360 LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
361 &insert_text, &newlines_after_name);
362 REQUIRE(insert_text == "std::function < int() > \n Type::Foo() {\n}");
363 REQUIRE(newlines_after_name == 0);
364 }
365
366 TEST_CASE("parameters") {
367 std::string buffer_content = "void Foo(int a,\n\n int b); ";
368 lsPosition declaration = CharPos(buffer_content, 'F');
369 std::string insert_text;
370 int newlines_after_name = 0;
371
372 LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
373 &newlines_after_name);
374 REQUIRE(insert_text == "void Foo(int a,\n\n int b) {\n}");
375 REQUIRE(newlines_after_name == 2);
376
377 LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
378 &insert_text, &newlines_after_name);
379 REQUIRE(insert_text == "void Type::Foo(int a,\n\n int b) {\n}");
380 REQUIRE(newlines_after_name == 2);
381 }
382 }
383
384 TEST_SUITE("LexWordAroundPos") {
385 TEST_CASE("edges") {
386 std::string content = "Foobar";
387 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'F'), content) == "Foobar");
388 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'o'), content) == "Foobar");
389 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'b'), content) == "Foobar");
390 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'a'), content) == "Foobar");
391 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'r'), content) == "Foobar");
392 }
393
394 TEST_CASE("simple") {
395 std::string content = " Foobar ";
396 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'F'), content) == "Foobar");
397 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'o'), content) == "Foobar");
398 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'b'), content) == "Foobar");
399 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'a'), content) == "Foobar");
400 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'r'), content) == "Foobar");
401 }
402
403 TEST_CASE("underscores, numbers and ::") {
404 std::string content = " file:ns::_my_t5ype7 ";
405 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'f'), content) == "file");
406 REQUIRE(LexIdentifierAroundPos(CharPos(content, 's'), content) == "ns");
407 REQUIRE(LexIdentifierAroundPos(CharPos(content, 'y'), content) ==
408 "ns::_my_t5ype7");
409 }
410
411 TEST_CASE("dot, dash, colon are skipped") {
412 std::string content = "1. 2- 3:";
413 REQUIRE(LexIdentifierAroundPos(CharPos(content, '1'), content) == "1");
414 REQUIRE(LexIdentifierAroundPos(CharPos(content, '2'), content) == "2");
415 REQUIRE(LexIdentifierAroundPos(CharPos(content, '3'), content) == "3");
416 }
417 }
418