1 #include "lex_utils.h"
2 
3 #include <doctest/doctest.h>
4 
5 #include <algorithm>
6 #include <iostream>
7 
8 // VSCode (UTF-16) disagrees with Emacs lsp-mode (UTF-8) on how to represent
9 // text documents.
10 // We use a UTF-8 iterator to approximate UTF-16 in the specification (weird).
11 // This is good enough and fails only for UTF-16 surrogate pairs.
GetOffsetForPosition(lsPosition position,std::string_view content)12 int GetOffsetForPosition(lsPosition position, std::string_view content) {
13   size_t i = 0;
14   // Iterate lines until we have found the correct line.
15   while (position.line > 0 && i < content.size()) {
16     if (content[i] == '\n')
17       position.line--;
18     i++;
19   }
20   // Iterate characters on the target line.
21   while (position.character > 0 && i < content.size()) {
22     if (uint8_t(content[i++]) >= 128) {
23       // Skip 0b10xxxxxx
24       while (i < content.size() && uint8_t(content[i]) >= 128 &&
25              uint8_t(content[i]) < 192)
26         i++;
27     }
28     position.character--;
29   }
30   return int(i);
31 }
32 
GetPositionForOffset(int offset,std::string_view content)33 lsPosition GetPositionForOffset(int offset, std::string_view content) {
34   lsPosition result;
35   for (int i = 0; i < offset && i < content.length(); ++i) {
36     if (content[i] == '\n') {
37       result.line++;
38       result.character = 0;
39     } else {
40       result.character++;
41     }
42   }
43   return result;
44 }
45 
CharPos(std::string_view search,char character,int character_offset)46 lsPosition CharPos(std::string_view search,
47                    char character,
48                    int character_offset) {
49   lsPosition result;
50   size_t index = 0;
51   while (index < search.size()) {
52     char c = search[index];
53     if (c == character)
54       break;
55     if (c == '\n') {
56       result.line += 1;
57       result.character = 0;
58     } else {
59       result.character += 1;
60     }
61     ++index;
62   }
63   assert(index < search.size());
64   result.character += character_offset;
65   return result;
66 }
67 
68 // TODO: eliminate |line_number| param.
ExtractQuotedRange(int line_number,const std::string & line)69 optional<lsRange> ExtractQuotedRange(int line_number, const std::string& line) {
70   // Find starting and ending quote.
71   int start = 0;
72   while (start < (int)line.size()) {
73     char c = line[start];
74     ++start;
75     if (c == '"' || c == '<')
76       break;
77   }
78   if (start == (int)line.size())
79     return nullopt;
80 
81   int end = (int)line.size();
82   while (end > 0) {
83     char c = line[end];
84     if (c == '"' || c == '>')
85       break;
86     --end;
87   }
88 
89   if (start >= end)
90     return nullopt;
91 
92   return lsRange(lsPosition(line_number, start), lsPosition(line_number, end));
93 }
94 
LexFunctionDeclaration(const std::string & buffer_content,lsPosition declaration_spelling,optional<std::string> type_name,std::string * insert_text,int * newlines_after_name)95 void LexFunctionDeclaration(const std::string& buffer_content,
96                             lsPosition declaration_spelling,
97                             optional<std::string> type_name,
98                             std::string* insert_text,
99                             int* newlines_after_name) {
100   int name_start = GetOffsetForPosition(declaration_spelling, buffer_content);
101 
102   bool parse_return_type = true;
103   // We need to check if we have a return type (ctors and dtors do not).
104   if (type_name) {
105     int name_end = name_start;
106     while (name_end < buffer_content.size()) {
107       char c = buffer_content[name_end];
108       if (isspace(c) || c == '(')
109         break;
110       ++name_end;
111     }
112 
113     std::string func_name =
114         buffer_content.substr(name_start, name_end - name_start);
115     if (func_name == *type_name || func_name == ("~" + *type_name))
116       parse_return_type = false;
117   }
118 
119   // We need to fetch the return type. This can get complex, ie,
120   //
121   //  std::vector <int> foo();
122   //
123   int return_start = name_start;
124   if (parse_return_type) {
125     int paren_balance = 0;
126     int angle_balance = 0;
127     bool expect_token = true;
128     while (return_start > 0) {
129       char c = buffer_content[return_start - 1];
130       if (paren_balance == 0 && angle_balance == 0) {
131         if (isspace(c) && !expect_token) {
132           break;
133         }
134         if (!isspace(c))
135           expect_token = false;
136       }
137 
138       if (c == ')')
139         ++paren_balance;
140       if (c == '(') {
141         --paren_balance;
142         expect_token = true;
143       }
144 
145       if (c == '>')
146         ++angle_balance;
147       if (c == '<') {
148         --angle_balance;
149         expect_token = true;
150       }
151 
152       return_start -= 1;
153     }
154   }
155 
156   // We need to fetch the arguments. Just scan for the next ';'.
157   *newlines_after_name = 0;
158   int end = name_start;
159   while (end < buffer_content.size()) {
160     char c = buffer_content[end];
161     if (c == ';')
162       break;
163     if (c == '\n')
164       *newlines_after_name += 1;
165     ++end;
166   }
167 
168   std::string result;
169   result += buffer_content.substr(return_start, name_start - return_start);
170   if (type_name && !type_name->empty())
171     result += *type_name + "::";
172   result += buffer_content.substr(name_start, end - name_start);
173   TrimEndInPlace(result);
174   result += " {\n}";
175   *insert_text = result;
176 }
177 
LexIdentifierAroundPos(lsPosition position,std::string_view content)178 std::string_view LexIdentifierAroundPos(lsPosition position,
179                                         std::string_view content) {
180   int start = GetOffsetForPosition(position, content);
181   int end = start + 1;
182   char c;
183 
184   // We search for :: before the cursor but not after to get the qualifier.
185   for (; start > 0; start--) {
186     c = content[start - 1];
187     if (isalnum(c) || c == '_')
188       ;
189     else if (c == ':' && start > 1 && content[start - 2] == ':')
190       start--;
191     else
192       break;
193   }
194 
195   for (; end < (int)content.size(); end++)
196     if (c = content[end], !(isalnum(c) || c == '_'))
197       break;
198 
199   return content.substr(start, end - start);
200 }
201 
202 // Find discontinous |search| in |content|.
203 // Return |found| and the count of skipped chars before found.
CaseFoldingSubsequenceMatch(std::string_view search,std::string_view content)204 std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
205                                                  std::string_view content) {
206   bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper);
207   int skip = 0;
208   size_t j = 0;
209   for (char c : search) {
210     while (j < content.size() &&
211            (hasUppercaseLetter ? content[j] != c
212                                : tolower(content[j]) != tolower(c)))
213       ++j, ++skip;
214     if (j == content.size())
215       return {false, skip};
216     ++j;
217   }
218   return {true, skip};
219 }
220 
221 TEST_SUITE("Offset") {
222   TEST_CASE("past end") {
223     std::string content = "foo";
224     int offset = GetOffsetForPosition(lsPosition(10, 10), content);
225     REQUIRE(offset <= content.size());
226   }
227 
228   TEST_CASE("in middle of content") {
229     std::string content = "abcdefghijk";
230     for (int i = 0; i < content.size(); ++i) {
231       int offset = GetOffsetForPosition(lsPosition(0, i), content);
232       REQUIRE(i == offset);
233     }
234   }
235 
236   TEST_CASE("at end of content") {
237     REQUIRE(GetOffsetForPosition(lsPosition(0, 0), "") == 0);
238     REQUIRE(GetOffsetForPosition(lsPosition(0, 1), "a") == 1);
239   }
240 }
241 
242 TEST_SUITE("offset") {
243   TEST_CASE("some") {
244     REQUIRE(GetPositionForOffset(0, "012345\n012345") == lsPosition(0, 0));
245     REQUIRE(GetPositionForOffset(1, "012345\n012345") == lsPosition(0, 1));
246     REQUIRE(GetPositionForOffset(2, "012345\n012345") == lsPosition(0, 2));
247     REQUIRE(GetPositionForOffset(3, "012345\n012345") == lsPosition(0, 3));
248     REQUIRE(GetPositionForOffset(4, "012345\n012345") == lsPosition(0, 4));
249     REQUIRE(GetPositionForOffset(5, "012345\n012345") == lsPosition(0, 5));
250     REQUIRE(GetPositionForOffset(6, "012345\n012345") == lsPosition(0, 6));
251     REQUIRE(GetPositionForOffset(7, "012345\n012345") == lsPosition(1, 0));
252     REQUIRE(GetPositionForOffset(8, "012345\n012345") == lsPosition(1, 1));
253     REQUIRE(GetPositionForOffset(9, "012345\n012345") == lsPosition(1, 2));
254     REQUIRE(GetPositionForOffset(10, "012345\n012345") == lsPosition(1, 3));
255     REQUIRE(GetPositionForOffset(11, "012345\n012345") == lsPosition(1, 4));
256     REQUIRE(GetPositionForOffset(12, "012345\n012345") == lsPosition(1, 5));
257 
258     // Overflow
259     REQUIRE(GetPositionForOffset(13, "012345\n012345") == lsPosition(1, 6));
260     REQUIRE(GetPositionForOffset(100, "012345\n012345") == lsPosition(1, 6));
261   }
262 
263   TEST_CASE("overflow") {
264     REQUIRE(GetOffsetForPosition(lsPosition(0, 0), "a") == 0);
265     REQUIRE(GetOffsetForPosition(lsPosition(0, 1), "a") == 1);
266     REQUIRE(GetPositionForOffset(0, "0") == lsPosition(0, 0));
267     REQUIRE(GetPositionForOffset(1, "0") == lsPosition(0, 1));
268     REQUIRE(GetPositionForOffset(5, "0") == lsPosition(0, 1));
269   }
270 }
271 
272 TEST_SUITE("Substring") {
273   TEST_CASE("skip") {
274     REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
275     REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
276     REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
277     REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
278     REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
279             std::make_pair(true, 1));
280     REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
281             std::make_pair(true, 1));
282     REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
283     REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
284             std::make_pair(false, 1));
285     REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
286             std::make_pair(true, 7));
287   }
288 }
289 
290 TEST_SUITE("LexFunctionDeclaration") {
291   TEST_CASE("simple") {
292     std::string buffer_content = " void Foo(); ";
293     lsPosition declaration = CharPos(buffer_content, 'F');
294     std::string insert_text;
295     int newlines_after_name = 0;
296 
297     LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
298                            &newlines_after_name);
299     REQUIRE(insert_text == "void Foo() {\n}");
300     REQUIRE(newlines_after_name == 0);
301 
302     LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
303                            &insert_text, &newlines_after_name);
304     REQUIRE(insert_text == "void Type::Foo() {\n}");
305     REQUIRE(newlines_after_name == 0);
306   }
307 
308   TEST_CASE("ctor") {
309     std::string buffer_content = " Foo(); ";
310     lsPosition declaration = CharPos(buffer_content, 'F');
311     std::string insert_text;
312     int newlines_after_name = 0;
313 
314     LexFunctionDeclaration(buffer_content, declaration, std::string("Foo"),
315                            &insert_text, &newlines_after_name);
316     REQUIRE(insert_text == "Foo::Foo() {\n}");
317     REQUIRE(newlines_after_name == 0);
318   }
319 
320   TEST_CASE("dtor") {
321     std::string buffer_content = " ~Foo(); ";
322     lsPosition declaration = CharPos(buffer_content, '~');
323     std::string insert_text;
324     int newlines_after_name = 0;
325 
326     LexFunctionDeclaration(buffer_content, declaration, std::string("Foo"),
327                            &insert_text, &newlines_after_name);
328     REQUIRE(insert_text == "Foo::~Foo() {\n}");
329     REQUIRE(newlines_after_name == 0);
330   }
331 
332   TEST_CASE("complex return type") {
333     std::string buffer_content = " std::vector<int> Foo(); ";
334     lsPosition declaration = CharPos(buffer_content, 'F');
335     std::string insert_text;
336     int newlines_after_name = 0;
337 
338     LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
339                            &newlines_after_name);
340     REQUIRE(insert_text == "std::vector<int> Foo() {\n}");
341     REQUIRE(newlines_after_name == 0);
342 
343     LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
344                            &insert_text, &newlines_after_name);
345     REQUIRE(insert_text == "std::vector<int> Type::Foo() {\n}");
346     REQUIRE(newlines_after_name == 0);
347   }
348 
349   TEST_CASE("extra complex return type") {
350     std::string buffer_content = " std::function < int() > \n Foo(); ";
351     lsPosition declaration = CharPos(buffer_content, 'F');
352     std::string insert_text;
353     int newlines_after_name = 0;
354 
355     LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
356                            &newlines_after_name);
357     REQUIRE(insert_text == "std::function < int() > \n Foo() {\n}");
358     REQUIRE(newlines_after_name == 0);
359 
360     LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
361                            &insert_text, &newlines_after_name);
362     REQUIRE(insert_text == "std::function < int() > \n Type::Foo() {\n}");
363     REQUIRE(newlines_after_name == 0);
364   }
365 
366   TEST_CASE("parameters") {
367     std::string buffer_content = "void Foo(int a,\n\n    int b); ";
368     lsPosition declaration = CharPos(buffer_content, 'F');
369     std::string insert_text;
370     int newlines_after_name = 0;
371 
372     LexFunctionDeclaration(buffer_content, declaration, nullopt, &insert_text,
373                            &newlines_after_name);
374     REQUIRE(insert_text == "void Foo(int a,\n\n    int b) {\n}");
375     REQUIRE(newlines_after_name == 2);
376 
377     LexFunctionDeclaration(buffer_content, declaration, std::string("Type"),
378                            &insert_text, &newlines_after_name);
379     REQUIRE(insert_text == "void Type::Foo(int a,\n\n    int b) {\n}");
380     REQUIRE(newlines_after_name == 2);
381   }
382 }
383 
384 TEST_SUITE("LexWordAroundPos") {
385   TEST_CASE("edges") {
386     std::string content = "Foobar";
387     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'F'), content) == "Foobar");
388     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'o'), content) == "Foobar");
389     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'b'), content) == "Foobar");
390     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'a'), content) == "Foobar");
391     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'r'), content) == "Foobar");
392   }
393 
394   TEST_CASE("simple") {
395     std::string content = "  Foobar  ";
396     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'F'), content) == "Foobar");
397     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'o'), content) == "Foobar");
398     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'b'), content) == "Foobar");
399     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'a'), content) == "Foobar");
400     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'r'), content) == "Foobar");
401   }
402 
403   TEST_CASE("underscores, numbers and ::") {
404     std::string content = "  file:ns::_my_t5ype7 ";
405     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'f'), content) == "file");
406     REQUIRE(LexIdentifierAroundPos(CharPos(content, 's'), content) == "ns");
407     REQUIRE(LexIdentifierAroundPos(CharPos(content, 'y'), content) ==
408             "ns::_my_t5ype7");
409   }
410 
411   TEST_CASE("dot, dash, colon are skipped") {
412     std::string content = "1. 2- 3:";
413     REQUIRE(LexIdentifierAroundPos(CharPos(content, '1'), content) == "1");
414     REQUIRE(LexIdentifierAroundPos(CharPos(content, '2'), content) == "2");
415     REQUIRE(LexIdentifierAroundPos(CharPos(content, '3'), content) == "3");
416   }
417 }
418