1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "CPlusPlusNameParser.h"
10 
11 #include "clang/Basic/IdentifierTable.h"
12 #include "llvm/ADT/StringMap.h"
13 #include "llvm/Support/Threading.h"
14 
15 using namespace lldb;
16 using namespace lldb_private;
17 using llvm::Optional;
18 using llvm::None;
19 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21 namespace tok = clang::tok;
22 
23 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24   m_next_token_index = 0;
25   Optional<ParsedFunction> result(None);
26 
27   // Try to parse the name as function without a return type specified e.g.
28   // main(int, char*[])
29   {
30     Bookmark start_position = SetBookmark();
31     result = ParseFunctionImpl(false);
32     if (result && !HasMoreTokens())
33       return result;
34   }
35 
36   // Try to parse the name as function with function pointer return type e.g.
37   // void (*get_func(const char*))()
38   result = ParseFuncPtr(true);
39   if (result)
40     return result;
41 
42   // Finally try to parse the name as a function with non-function return type
43   // e.g. int main(int, char*[])
44   result = ParseFunctionImpl(true);
45   if (HasMoreTokens())
46     return None;
47   return result;
48 }
49 
50 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51   m_next_token_index = 0;
52   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53   if (!name_ranges)
54     return None;
55   if (HasMoreTokens())
56     return None;
57   ParsedName result;
58   result.basename = GetTextForRange(name_ranges.value().basename_range);
59   result.context = GetTextForRange(name_ranges.value().context_range);
60   return result;
61 }
62 
63 bool CPlusPlusNameParser::HasMoreTokens() {
64   return m_next_token_index < m_tokens.size();
65 }
66 
67 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68 
69 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70 
71 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72   if (!HasMoreTokens())
73     return false;
74 
75   if (!Peek().is(kind))
76     return false;
77 
78   Advance();
79   return true;
80 }
81 
82 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83   if (!HasMoreTokens())
84     return false;
85 
86   if (!Peek().isOneOf(kinds...))
87     return false;
88 
89   Advance();
90   return true;
91 }
92 
93 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94   return Bookmark(m_next_token_index);
95 }
96 
97 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98 
99 clang::Token &CPlusPlusNameParser::Peek() {
100   assert(HasMoreTokens());
101   return m_tokens[m_next_token_index];
102 }
103 
104 Optional<ParsedFunction>
105 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106   Bookmark start_position = SetBookmark();
107   if (expect_return_type) {
108     // Consume return type if it's expected.
109     if (!ConsumeTypename())
110       return None;
111   }
112 
113   auto maybe_name = ParseFullNameImpl();
114   if (!maybe_name) {
115     return None;
116   }
117 
118   size_t argument_start = GetCurrentPosition();
119   if (!ConsumeArguments()) {
120     return None;
121   }
122 
123   size_t qualifiers_start = GetCurrentPosition();
124   SkipFunctionQualifiers();
125   size_t end_position = GetCurrentPosition();
126 
127   ParsedFunction result;
128   result.name.basename = GetTextForRange(maybe_name.value().basename_range);
129   result.name.context = GetTextForRange(maybe_name.value().context_range);
130   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132   start_position.Remove();
133   return result;
134 }
135 
136 Optional<ParsedFunction>
137 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138   Bookmark start_position = SetBookmark();
139   if (expect_return_type) {
140     // Consume return type.
141     if (!ConsumeTypename())
142       return None;
143   }
144 
145   if (!ConsumeToken(tok::l_paren))
146     return None;
147   if (!ConsumePtrsAndRefs())
148     return None;
149 
150   {
151     Bookmark before_inner_function_pos = SetBookmark();
152     auto maybe_inner_function_name = ParseFunctionImpl(false);
153     if (maybe_inner_function_name)
154       if (ConsumeToken(tok::r_paren))
155         if (ConsumeArguments()) {
156           SkipFunctionQualifiers();
157           start_position.Remove();
158           before_inner_function_pos.Remove();
159           return maybe_inner_function_name;
160         }
161   }
162 
163   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164   if (maybe_inner_function_ptr_name)
165     if (ConsumeToken(tok::r_paren))
166       if (ConsumeArguments()) {
167         SkipFunctionQualifiers();
168         start_position.Remove();
169         return maybe_inner_function_ptr_name;
170       }
171   return None;
172 }
173 
174 bool CPlusPlusNameParser::ConsumeArguments() {
175   return ConsumeBrackets(tok::l_paren, tok::r_paren);
176 }
177 
178 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179   Bookmark start_position = SetBookmark();
180   if (!HasMoreTokens() || Peek().getKind() != tok::less)
181     return false;
182   Advance();
183 
184   // Consuming template arguments is a bit trickier than consuming function
185   // arguments, because '<' '>' brackets are not always trivially balanced. In
186   // some rare cases tokens '<' and '>' can appear inside template arguments as
187   // arithmetic or shift operators not as template brackets. Examples:
188   // std::enable_if<(10u)<(64), bool>
189   //           f<A<operator<(X,Y)::Subclass>>
190   // Good thing that compiler makes sure that really ambiguous cases of '>'
191   // usage should be enclosed within '()' brackets.
192   int template_counter = 1;
193   bool can_open_template = false;
194   while (HasMoreTokens() && template_counter > 0) {
195     tok::TokenKind kind = Peek().getKind();
196     switch (kind) {
197     case tok::greatergreater:
198       template_counter -= 2;
199       can_open_template = false;
200       Advance();
201       break;
202     case tok::greater:
203       --template_counter;
204       can_open_template = false;
205       Advance();
206       break;
207     case tok::less:
208       // '<' is an attempt to open a subteamplte
209       // check if parser is at the point where it's actually possible,
210       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211       // need to do the same for '>' because compiler actually makes sure that
212       // '>' always surrounded by brackets to avoid ambiguity.
213       if (can_open_template)
214         ++template_counter;
215       can_open_template = false;
216       Advance();
217       break;
218     case tok::kw_operator: // C++ operator overloading.
219       if (!ConsumeOperator())
220         return false;
221       can_open_template = true;
222       break;
223     case tok::raw_identifier:
224       can_open_template = true;
225       Advance();
226       break;
227     case tok::l_square:
228       if (!ConsumeBrackets(tok::l_square, tok::r_square))
229         return false;
230       can_open_template = false;
231       break;
232     case tok::l_paren:
233       if (!ConsumeArguments())
234         return false;
235       can_open_template = false;
236       break;
237     default:
238       can_open_template = false;
239       Advance();
240       break;
241     }
242   }
243 
244   if (template_counter != 0) {
245     return false;
246   }
247   start_position.Remove();
248   return true;
249 }
250 
251 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252   Bookmark start_position = SetBookmark();
253   if (!ConsumeToken(tok::l_paren)) {
254     return false;
255   }
256   constexpr llvm::StringLiteral g_anonymous("anonymous");
257   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258       Peek().getRawIdentifier() == g_anonymous) {
259     Advance();
260   } else {
261     return false;
262   }
263 
264   if (!ConsumeToken(tok::kw_namespace)) {
265     return false;
266   }
267 
268   if (!ConsumeToken(tok::r_paren)) {
269     return false;
270   }
271   start_position.Remove();
272   return true;
273 }
274 
275 bool CPlusPlusNameParser::ConsumeLambda() {
276   Bookmark start_position = SetBookmark();
277   if (!ConsumeToken(tok::l_brace)) {
278     return false;
279   }
280   constexpr llvm::StringLiteral g_lambda("lambda");
281   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282       Peek().getRawIdentifier() == g_lambda) {
283     // Put the matched brace back so we can use ConsumeBrackets
284     TakeBack();
285   } else {
286     return false;
287   }
288 
289   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
290     return false;
291   }
292 
293   start_position.Remove();
294   return true;
295 }
296 
297 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298                                           tok::TokenKind right) {
299   Bookmark start_position = SetBookmark();
300   if (!HasMoreTokens() || Peek().getKind() != left)
301     return false;
302   Advance();
303 
304   int counter = 1;
305   while (HasMoreTokens() && counter > 0) {
306     tok::TokenKind kind = Peek().getKind();
307     if (kind == right)
308       --counter;
309     else if (kind == left)
310       ++counter;
311     Advance();
312   }
313 
314   assert(counter >= 0);
315   if (counter > 0) {
316     return false;
317   }
318   start_position.Remove();
319   return true;
320 }
321 
322 bool CPlusPlusNameParser::ConsumeOperator() {
323   Bookmark start_position = SetBookmark();
324   if (!ConsumeToken(tok::kw_operator))
325     return false;
326 
327   if (!HasMoreTokens()) {
328     return false;
329   }
330 
331   const auto &token = Peek();
332 
333   // When clang generates debug info it adds template parameters to names.
334   // Since clang doesn't add a space between the name and the template parameter
335   // in some cases we are not generating valid C++ names e.g.:
336   //
337   //   operator<<A::B>
338   //
339   // In some of these cases we will not parse them correctly. This fixes the
340   // issue by detecting this case and inserting tok::less in place of
341   // tok::lessless and returning successfully that we consumed the operator.
342   if (token.getKind() == tok::lessless) {
343     // Make sure we have more tokens before attempting to look ahead one more.
344     if (m_next_token_index + 1 < m_tokens.size()) {
345       // Look ahead two tokens.
346       clang::Token n_token = m_tokens[m_next_token_index + 1];
347       // If we find ( or < then this is indeed operator<< no need for fix.
348       if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
349         clang::Token tmp_tok;
350         tmp_tok.startToken();
351         tmp_tok.setLength(1);
352         tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
353         tmp_tok.setKind(tok::less);
354 
355         m_tokens[m_next_token_index] = tmp_tok;
356 
357         start_position.Remove();
358         return true;
359       }
360     }
361   }
362 
363   switch (token.getKind()) {
364   case tok::kw_new:
365   case tok::kw_delete:
366     // This is 'new' or 'delete' operators.
367     Advance();
368     // Check for array new/delete.
369     if (HasMoreTokens() && Peek().is(tok::l_square)) {
370       // Consume the '[' and ']'.
371       if (!ConsumeBrackets(tok::l_square, tok::r_square))
372         return false;
373     }
374     break;
375 
376 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
377   case tok::Token:                                                             \
378     Advance();                                                                 \
379     break;
380 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
381 #include "clang/Basic/OperatorKinds.def"
382 #undef OVERLOADED_OPERATOR
383 #undef OVERLOADED_OPERATOR_MULTI
384 
385   case tok::l_paren:
386     // Call operator consume '(' ... ')'.
387     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
388       break;
389     return false;
390 
391   case tok::l_square:
392     // This is a [] operator.
393     // Consume the '[' and ']'.
394     if (ConsumeBrackets(tok::l_square, tok::r_square))
395       break;
396     return false;
397 
398   default:
399     // This might be a cast operator.
400     if (ConsumeTypename())
401       break;
402     return false;
403   }
404   start_position.Remove();
405   return true;
406 }
407 
408 void CPlusPlusNameParser::SkipTypeQualifiers() {
409   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
410     ;
411 }
412 
413 void CPlusPlusNameParser::SkipFunctionQualifiers() {
414   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
415     ;
416 }
417 
418 bool CPlusPlusNameParser::ConsumeBuiltinType() {
419   bool result = false;
420   bool continue_parsing = true;
421   // Built-in types can be made of a few keywords like 'unsigned long long
422   // int'. This function consumes all built-in type keywords without checking
423   // if they make sense like 'unsigned char void'.
424   while (continue_parsing && HasMoreTokens()) {
425     switch (Peek().getKind()) {
426     case tok::kw_short:
427     case tok::kw_long:
428     case tok::kw___int64:
429     case tok::kw___int128:
430     case tok::kw_signed:
431     case tok::kw_unsigned:
432     case tok::kw_void:
433     case tok::kw_char:
434     case tok::kw_int:
435     case tok::kw_half:
436     case tok::kw_float:
437     case tok::kw_double:
438     case tok::kw___float128:
439     case tok::kw_wchar_t:
440     case tok::kw_bool:
441     case tok::kw_char16_t:
442     case tok::kw_char32_t:
443       result = true;
444       Advance();
445       break;
446     default:
447       continue_parsing = false;
448       break;
449     }
450   }
451   return result;
452 }
453 
454 void CPlusPlusNameParser::SkipPtrsAndRefs() {
455   // Ignoring result.
456   ConsumePtrsAndRefs();
457 }
458 
459 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
460   bool found = false;
461   SkipTypeQualifiers();
462   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
463                       tok::kw_volatile)) {
464     found = true;
465     SkipTypeQualifiers();
466   }
467   return found;
468 }
469 
470 bool CPlusPlusNameParser::ConsumeDecltype() {
471   Bookmark start_position = SetBookmark();
472   if (!ConsumeToken(tok::kw_decltype))
473     return false;
474 
475   if (!ConsumeArguments())
476     return false;
477 
478   start_position.Remove();
479   return true;
480 }
481 
482 bool CPlusPlusNameParser::ConsumeTypename() {
483   Bookmark start_position = SetBookmark();
484   SkipTypeQualifiers();
485   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
486     if (!ParseFullNameImpl())
487       return false;
488   }
489   SkipPtrsAndRefs();
490   start_position.Remove();
491   return true;
492 }
493 
494 Optional<CPlusPlusNameParser::ParsedNameRanges>
495 CPlusPlusNameParser::ParseFullNameImpl() {
496   // Name parsing state machine.
497   enum class State {
498     Beginning,       // start of the name
499     AfterTwoColons,  // right after ::
500     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
501     AfterTemplate,   // right after template brackets (<something>)
502     AfterOperator,   // right after name of C++ operator
503   };
504 
505   Bookmark start_position = SetBookmark();
506   State state = State::Beginning;
507   bool continue_parsing = true;
508   Optional<size_t> last_coloncolon_position = None;
509 
510   while (continue_parsing && HasMoreTokens()) {
511     const auto &token = Peek();
512     switch (token.getKind()) {
513     case tok::raw_identifier: // Just a name.
514       if (state != State::Beginning && state != State::AfterTwoColons) {
515         continue_parsing = false;
516         break;
517       }
518       Advance();
519       state = State::AfterIdentifier;
520       break;
521     case tok::l_paren: {
522       if (state == State::Beginning || state == State::AfterTwoColons) {
523         // (anonymous namespace)
524         if (ConsumeAnonymousNamespace()) {
525           state = State::AfterIdentifier;
526           break;
527         }
528       }
529 
530       // Type declared inside a function 'func()::Type'
531       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
532           state != State::AfterOperator) {
533         continue_parsing = false;
534         break;
535       }
536       Bookmark l_paren_position = SetBookmark();
537       // Consume the '(' ... ') [const]'.
538       if (!ConsumeArguments()) {
539         continue_parsing = false;
540         break;
541       }
542       SkipFunctionQualifiers();
543 
544       // Consume '::'
545       size_t coloncolon_position = GetCurrentPosition();
546       if (!ConsumeToken(tok::coloncolon)) {
547         continue_parsing = false;
548         break;
549       }
550       l_paren_position.Remove();
551       last_coloncolon_position = coloncolon_position;
552       state = State::AfterTwoColons;
553       break;
554     }
555     case tok::l_brace:
556       if (state == State::Beginning || state == State::AfterTwoColons) {
557         if (ConsumeLambda()) {
558           state = State::AfterIdentifier;
559           break;
560         }
561       }
562       continue_parsing = false;
563       break;
564     case tok::coloncolon: // Type nesting delimiter.
565       if (state != State::Beginning && state != State::AfterIdentifier &&
566           state != State::AfterTemplate) {
567         continue_parsing = false;
568         break;
569       }
570       last_coloncolon_position = GetCurrentPosition();
571       Advance();
572       state = State::AfterTwoColons;
573       break;
574     case tok::less: // Template brackets.
575       if (state != State::AfterIdentifier && state != State::AfterOperator) {
576         continue_parsing = false;
577         break;
578       }
579       if (!ConsumeTemplateArgs()) {
580         continue_parsing = false;
581         break;
582       }
583       state = State::AfterTemplate;
584       break;
585     case tok::kw_operator: // C++ operator overloading.
586       if (state != State::Beginning && state != State::AfterTwoColons) {
587         continue_parsing = false;
588         break;
589       }
590       if (!ConsumeOperator()) {
591         continue_parsing = false;
592         break;
593       }
594       state = State::AfterOperator;
595       break;
596     case tok::tilde: // Destructor.
597       if (state != State::Beginning && state != State::AfterTwoColons) {
598         continue_parsing = false;
599         break;
600       }
601       Advance();
602       if (ConsumeToken(tok::raw_identifier)) {
603         state = State::AfterIdentifier;
604       } else {
605         TakeBack();
606         continue_parsing = false;
607       }
608       break;
609     default:
610       continue_parsing = false;
611       break;
612     }
613   }
614 
615   if (state == State::AfterIdentifier || state == State::AfterOperator ||
616       state == State::AfterTemplate) {
617     ParsedNameRanges result;
618     if (last_coloncolon_position) {
619       result.context_range = Range(start_position.GetSavedPosition(),
620                                    last_coloncolon_position.value());
621       result.basename_range =
622           Range(last_coloncolon_position.value() + 1, GetCurrentPosition());
623     } else {
624       result.basename_range =
625           Range(start_position.GetSavedPosition(), GetCurrentPosition());
626     }
627     start_position.Remove();
628     return result;
629   } else {
630     return None;
631   }
632 }
633 
634 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
635   if (range.empty())
636     return llvm::StringRef();
637   assert(range.begin_index < range.end_index);
638   assert(range.begin_index < m_tokens.size());
639   assert(range.end_index <= m_tokens.size());
640   clang::Token &first_token = m_tokens[range.begin_index];
641   clang::Token &last_token = m_tokens[range.end_index - 1];
642   clang::SourceLocation start_loc = first_token.getLocation();
643   clang::SourceLocation end_loc = last_token.getLocation();
644   unsigned start_pos = start_loc.getRawEncoding();
645   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
646   return m_text.take_front(end_pos).drop_front(start_pos);
647 }
648 
649 static const clang::LangOptions &GetLangOptions() {
650   static clang::LangOptions g_options;
651   static llvm::once_flag g_once_flag;
652   llvm::call_once(g_once_flag, []() {
653     g_options.LineComment = true;
654     g_options.C99 = true;
655     g_options.C11 = true;
656     g_options.CPlusPlus = true;
657     g_options.CPlusPlus11 = true;
658     g_options.CPlusPlus14 = true;
659     g_options.CPlusPlus17 = true;
660   });
661   return g_options;
662 }
663 
664 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
665   static llvm::StringMap<tok::TokenKind> g_map{
666 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
667 #include "clang/Basic/TokenKinds.def"
668 #undef KEYWORD
669   };
670   return g_map;
671 }
672 
673 void CPlusPlusNameParser::ExtractTokens() {
674   if (m_text.empty())
675     return;
676   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
677                      m_text.data(), m_text.data() + m_text.size());
678   const auto &kw_map = GetKeywordsMap();
679   clang::Token token;
680   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
681        lexer.LexFromRawLexer(token)) {
682     if (token.is(clang::tok::raw_identifier)) {
683       auto it = kw_map.find(token.getRawIdentifier());
684       if (it != kw_map.end()) {
685         token.setKind(it->getValue());
686       }
687     }
688 
689     m_tokens.push_back(token);
690   }
691 }
692