1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "CPlusPlusNameParser.h" 10 11 #include "clang/Basic/IdentifierTable.h" 12 #include "llvm/ADT/StringMap.h" 13 #include "llvm/Support/Threading.h" 14 15 using namespace lldb; 16 using namespace lldb_private; 17 using llvm::Optional; 18 using llvm::None; 19 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 20 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 21 namespace tok = clang::tok; 22 23 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 24 m_next_token_index = 0; 25 Optional<ParsedFunction> result(None); 26 27 // Try to parse the name as function without a return type specified e.g. 28 // main(int, char*[]) 29 { 30 Bookmark start_position = SetBookmark(); 31 result = ParseFunctionImpl(false); 32 if (result && !HasMoreTokens()) 33 return result; 34 } 35 36 // Try to parse the name as function with function pointer return type e.g. 37 // void (*get_func(const char*))() 38 result = ParseFuncPtr(true); 39 if (result) 40 return result; 41 42 // Finally try to parse the name as a function with non-function return type 43 // e.g. int main(int, char*[]) 44 result = ParseFunctionImpl(true); 45 if (HasMoreTokens()) 46 return None; 47 return result; 48 } 49 50 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 51 m_next_token_index = 0; 52 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 53 if (!name_ranges) 54 return None; 55 if (HasMoreTokens()) 56 return None; 57 ParsedName result; 58 result.basename = GetTextForRange(name_ranges.value().basename_range); 59 result.context = GetTextForRange(name_ranges.value().context_range); 60 return result; 61 } 62 63 bool CPlusPlusNameParser::HasMoreTokens() { 64 return m_next_token_index < m_tokens.size(); 65 } 66 67 void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 68 69 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 70 71 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 72 if (!HasMoreTokens()) 73 return false; 74 75 if (!Peek().is(kind)) 76 return false; 77 78 Advance(); 79 return true; 80 } 81 82 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 83 if (!HasMoreTokens()) 84 return false; 85 86 if (!Peek().isOneOf(kinds...)) 87 return false; 88 89 Advance(); 90 return true; 91 } 92 93 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 94 return Bookmark(m_next_token_index); 95 } 96 97 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 98 99 clang::Token &CPlusPlusNameParser::Peek() { 100 assert(HasMoreTokens()); 101 return m_tokens[m_next_token_index]; 102 } 103 104 Optional<ParsedFunction> 105 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 106 Bookmark start_position = SetBookmark(); 107 if (expect_return_type) { 108 // Consume return type if it's expected. 109 if (!ConsumeTypename()) 110 return None; 111 } 112 113 auto maybe_name = ParseFullNameImpl(); 114 if (!maybe_name) { 115 return None; 116 } 117 118 size_t argument_start = GetCurrentPosition(); 119 if (!ConsumeArguments()) { 120 return None; 121 } 122 123 size_t qualifiers_start = GetCurrentPosition(); 124 SkipFunctionQualifiers(); 125 size_t end_position = GetCurrentPosition(); 126 127 ParsedFunction result; 128 result.name.basename = GetTextForRange(maybe_name.value().basename_range); 129 result.name.context = GetTextForRange(maybe_name.value().context_range); 130 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 131 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 132 start_position.Remove(); 133 return result; 134 } 135 136 Optional<ParsedFunction> 137 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 138 Bookmark start_position = SetBookmark(); 139 if (expect_return_type) { 140 // Consume return type. 141 if (!ConsumeTypename()) 142 return None; 143 } 144 145 if (!ConsumeToken(tok::l_paren)) 146 return None; 147 if (!ConsumePtrsAndRefs()) 148 return None; 149 150 { 151 Bookmark before_inner_function_pos = SetBookmark(); 152 auto maybe_inner_function_name = ParseFunctionImpl(false); 153 if (maybe_inner_function_name) 154 if (ConsumeToken(tok::r_paren)) 155 if (ConsumeArguments()) { 156 SkipFunctionQualifiers(); 157 start_position.Remove(); 158 before_inner_function_pos.Remove(); 159 return maybe_inner_function_name; 160 } 161 } 162 163 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 164 if (maybe_inner_function_ptr_name) 165 if (ConsumeToken(tok::r_paren)) 166 if (ConsumeArguments()) { 167 SkipFunctionQualifiers(); 168 start_position.Remove(); 169 return maybe_inner_function_ptr_name; 170 } 171 return None; 172 } 173 174 bool CPlusPlusNameParser::ConsumeArguments() { 175 return ConsumeBrackets(tok::l_paren, tok::r_paren); 176 } 177 178 bool CPlusPlusNameParser::ConsumeTemplateArgs() { 179 Bookmark start_position = SetBookmark(); 180 if (!HasMoreTokens() || Peek().getKind() != tok::less) 181 return false; 182 Advance(); 183 184 // Consuming template arguments is a bit trickier than consuming function 185 // arguments, because '<' '>' brackets are not always trivially balanced. In 186 // some rare cases tokens '<' and '>' can appear inside template arguments as 187 // arithmetic or shift operators not as template brackets. Examples: 188 // std::enable_if<(10u)<(64), bool> 189 // f<A<operator<(X,Y)::Subclass>> 190 // Good thing that compiler makes sure that really ambiguous cases of '>' 191 // usage should be enclosed within '()' brackets. 192 int template_counter = 1; 193 bool can_open_template = false; 194 while (HasMoreTokens() && template_counter > 0) { 195 tok::TokenKind kind = Peek().getKind(); 196 switch (kind) { 197 case tok::greatergreater: 198 template_counter -= 2; 199 can_open_template = false; 200 Advance(); 201 break; 202 case tok::greater: 203 --template_counter; 204 can_open_template = false; 205 Advance(); 206 break; 207 case tok::less: 208 // '<' is an attempt to open a subteamplte 209 // check if parser is at the point where it's actually possible, 210 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No 211 // need to do the same for '>' because compiler actually makes sure that 212 // '>' always surrounded by brackets to avoid ambiguity. 213 if (can_open_template) 214 ++template_counter; 215 can_open_template = false; 216 Advance(); 217 break; 218 case tok::kw_operator: // C++ operator overloading. 219 if (!ConsumeOperator()) 220 return false; 221 can_open_template = true; 222 break; 223 case tok::raw_identifier: 224 can_open_template = true; 225 Advance(); 226 break; 227 case tok::l_square: 228 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 229 return false; 230 can_open_template = false; 231 break; 232 case tok::l_paren: 233 if (!ConsumeArguments()) 234 return false; 235 can_open_template = false; 236 break; 237 default: 238 can_open_template = false; 239 Advance(); 240 break; 241 } 242 } 243 244 if (template_counter != 0) { 245 return false; 246 } 247 start_position.Remove(); 248 return true; 249 } 250 251 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 252 Bookmark start_position = SetBookmark(); 253 if (!ConsumeToken(tok::l_paren)) { 254 return false; 255 } 256 constexpr llvm::StringLiteral g_anonymous("anonymous"); 257 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 258 Peek().getRawIdentifier() == g_anonymous) { 259 Advance(); 260 } else { 261 return false; 262 } 263 264 if (!ConsumeToken(tok::kw_namespace)) { 265 return false; 266 } 267 268 if (!ConsumeToken(tok::r_paren)) { 269 return false; 270 } 271 start_position.Remove(); 272 return true; 273 } 274 275 bool CPlusPlusNameParser::ConsumeLambda() { 276 Bookmark start_position = SetBookmark(); 277 if (!ConsumeToken(tok::l_brace)) { 278 return false; 279 } 280 constexpr llvm::StringLiteral g_lambda("lambda"); 281 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 282 Peek().getRawIdentifier() == g_lambda) { 283 // Put the matched brace back so we can use ConsumeBrackets 284 TakeBack(); 285 } else { 286 return false; 287 } 288 289 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 290 return false; 291 } 292 293 start_position.Remove(); 294 return true; 295 } 296 297 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 298 tok::TokenKind right) { 299 Bookmark start_position = SetBookmark(); 300 if (!HasMoreTokens() || Peek().getKind() != left) 301 return false; 302 Advance(); 303 304 int counter = 1; 305 while (HasMoreTokens() && counter > 0) { 306 tok::TokenKind kind = Peek().getKind(); 307 if (kind == right) 308 --counter; 309 else if (kind == left) 310 ++counter; 311 Advance(); 312 } 313 314 assert(counter >= 0); 315 if (counter > 0) { 316 return false; 317 } 318 start_position.Remove(); 319 return true; 320 } 321 322 bool CPlusPlusNameParser::ConsumeOperator() { 323 Bookmark start_position = SetBookmark(); 324 if (!ConsumeToken(tok::kw_operator)) 325 return false; 326 327 if (!HasMoreTokens()) { 328 return false; 329 } 330 331 const auto &token = Peek(); 332 333 // When clang generates debug info it adds template parameters to names. 334 // Since clang doesn't add a space between the name and the template parameter 335 // in some cases we are not generating valid C++ names e.g.: 336 // 337 // operator<<A::B> 338 // 339 // In some of these cases we will not parse them correctly. This fixes the 340 // issue by detecting this case and inserting tok::less in place of 341 // tok::lessless and returning successfully that we consumed the operator. 342 if (token.getKind() == tok::lessless) { 343 // Make sure we have more tokens before attempting to look ahead one more. 344 if (m_next_token_index + 1 < m_tokens.size()) { 345 // Look ahead two tokens. 346 clang::Token n_token = m_tokens[m_next_token_index + 1]; 347 // If we find ( or < then this is indeed operator<< no need for fix. 348 if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) { 349 clang::Token tmp_tok; 350 tmp_tok.startToken(); 351 tmp_tok.setLength(1); 352 tmp_tok.setLocation(token.getLocation().getLocWithOffset(1)); 353 tmp_tok.setKind(tok::less); 354 355 m_tokens[m_next_token_index] = tmp_tok; 356 357 start_position.Remove(); 358 return true; 359 } 360 } 361 } 362 363 switch (token.getKind()) { 364 case tok::kw_new: 365 case tok::kw_delete: 366 // This is 'new' or 'delete' operators. 367 Advance(); 368 // Check for array new/delete. 369 if (HasMoreTokens() && Peek().is(tok::l_square)) { 370 // Consume the '[' and ']'. 371 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 372 return false; 373 } 374 break; 375 376 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 377 case tok::Token: \ 378 Advance(); \ 379 break; 380 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 381 #include "clang/Basic/OperatorKinds.def" 382 #undef OVERLOADED_OPERATOR 383 #undef OVERLOADED_OPERATOR_MULTI 384 385 case tok::l_paren: 386 // Call operator consume '(' ... ')'. 387 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 388 break; 389 return false; 390 391 case tok::l_square: 392 // This is a [] operator. 393 // Consume the '[' and ']'. 394 if (ConsumeBrackets(tok::l_square, tok::r_square)) 395 break; 396 return false; 397 398 default: 399 // This might be a cast operator. 400 if (ConsumeTypename()) 401 break; 402 return false; 403 } 404 start_position.Remove(); 405 return true; 406 } 407 408 void CPlusPlusNameParser::SkipTypeQualifiers() { 409 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 410 ; 411 } 412 413 void CPlusPlusNameParser::SkipFunctionQualifiers() { 414 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 415 ; 416 } 417 418 bool CPlusPlusNameParser::ConsumeBuiltinType() { 419 bool result = false; 420 bool continue_parsing = true; 421 // Built-in types can be made of a few keywords like 'unsigned long long 422 // int'. This function consumes all built-in type keywords without checking 423 // if they make sense like 'unsigned char void'. 424 while (continue_parsing && HasMoreTokens()) { 425 switch (Peek().getKind()) { 426 case tok::kw_short: 427 case tok::kw_long: 428 case tok::kw___int64: 429 case tok::kw___int128: 430 case tok::kw_signed: 431 case tok::kw_unsigned: 432 case tok::kw_void: 433 case tok::kw_char: 434 case tok::kw_int: 435 case tok::kw_half: 436 case tok::kw_float: 437 case tok::kw_double: 438 case tok::kw___float128: 439 case tok::kw_wchar_t: 440 case tok::kw_bool: 441 case tok::kw_char16_t: 442 case tok::kw_char32_t: 443 result = true; 444 Advance(); 445 break; 446 default: 447 continue_parsing = false; 448 break; 449 } 450 } 451 return result; 452 } 453 454 void CPlusPlusNameParser::SkipPtrsAndRefs() { 455 // Ignoring result. 456 ConsumePtrsAndRefs(); 457 } 458 459 bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 460 bool found = false; 461 SkipTypeQualifiers(); 462 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 463 tok::kw_volatile)) { 464 found = true; 465 SkipTypeQualifiers(); 466 } 467 return found; 468 } 469 470 bool CPlusPlusNameParser::ConsumeDecltype() { 471 Bookmark start_position = SetBookmark(); 472 if (!ConsumeToken(tok::kw_decltype)) 473 return false; 474 475 if (!ConsumeArguments()) 476 return false; 477 478 start_position.Remove(); 479 return true; 480 } 481 482 bool CPlusPlusNameParser::ConsumeTypename() { 483 Bookmark start_position = SetBookmark(); 484 SkipTypeQualifiers(); 485 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 486 if (!ParseFullNameImpl()) 487 return false; 488 } 489 SkipPtrsAndRefs(); 490 start_position.Remove(); 491 return true; 492 } 493 494 Optional<CPlusPlusNameParser::ParsedNameRanges> 495 CPlusPlusNameParser::ParseFullNameImpl() { 496 // Name parsing state machine. 497 enum class State { 498 Beginning, // start of the name 499 AfterTwoColons, // right after :: 500 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 501 AfterTemplate, // right after template brackets (<something>) 502 AfterOperator, // right after name of C++ operator 503 }; 504 505 Bookmark start_position = SetBookmark(); 506 State state = State::Beginning; 507 bool continue_parsing = true; 508 Optional<size_t> last_coloncolon_position = None; 509 510 while (continue_parsing && HasMoreTokens()) { 511 const auto &token = Peek(); 512 switch (token.getKind()) { 513 case tok::raw_identifier: // Just a name. 514 if (state != State::Beginning && state != State::AfterTwoColons) { 515 continue_parsing = false; 516 break; 517 } 518 Advance(); 519 state = State::AfterIdentifier; 520 break; 521 case tok::l_paren: { 522 if (state == State::Beginning || state == State::AfterTwoColons) { 523 // (anonymous namespace) 524 if (ConsumeAnonymousNamespace()) { 525 state = State::AfterIdentifier; 526 break; 527 } 528 } 529 530 // Type declared inside a function 'func()::Type' 531 if (state != State::AfterIdentifier && state != State::AfterTemplate && 532 state != State::AfterOperator) { 533 continue_parsing = false; 534 break; 535 } 536 Bookmark l_paren_position = SetBookmark(); 537 // Consume the '(' ... ') [const]'. 538 if (!ConsumeArguments()) { 539 continue_parsing = false; 540 break; 541 } 542 SkipFunctionQualifiers(); 543 544 // Consume '::' 545 size_t coloncolon_position = GetCurrentPosition(); 546 if (!ConsumeToken(tok::coloncolon)) { 547 continue_parsing = false; 548 break; 549 } 550 l_paren_position.Remove(); 551 last_coloncolon_position = coloncolon_position; 552 state = State::AfterTwoColons; 553 break; 554 } 555 case tok::l_brace: 556 if (state == State::Beginning || state == State::AfterTwoColons) { 557 if (ConsumeLambda()) { 558 state = State::AfterIdentifier; 559 break; 560 } 561 } 562 continue_parsing = false; 563 break; 564 case tok::coloncolon: // Type nesting delimiter. 565 if (state != State::Beginning && state != State::AfterIdentifier && 566 state != State::AfterTemplate) { 567 continue_parsing = false; 568 break; 569 } 570 last_coloncolon_position = GetCurrentPosition(); 571 Advance(); 572 state = State::AfterTwoColons; 573 break; 574 case tok::less: // Template brackets. 575 if (state != State::AfterIdentifier && state != State::AfterOperator) { 576 continue_parsing = false; 577 break; 578 } 579 if (!ConsumeTemplateArgs()) { 580 continue_parsing = false; 581 break; 582 } 583 state = State::AfterTemplate; 584 break; 585 case tok::kw_operator: // C++ operator overloading. 586 if (state != State::Beginning && state != State::AfterTwoColons) { 587 continue_parsing = false; 588 break; 589 } 590 if (!ConsumeOperator()) { 591 continue_parsing = false; 592 break; 593 } 594 state = State::AfterOperator; 595 break; 596 case tok::tilde: // Destructor. 597 if (state != State::Beginning && state != State::AfterTwoColons) { 598 continue_parsing = false; 599 break; 600 } 601 Advance(); 602 if (ConsumeToken(tok::raw_identifier)) { 603 state = State::AfterIdentifier; 604 } else { 605 TakeBack(); 606 continue_parsing = false; 607 } 608 break; 609 default: 610 continue_parsing = false; 611 break; 612 } 613 } 614 615 if (state == State::AfterIdentifier || state == State::AfterOperator || 616 state == State::AfterTemplate) { 617 ParsedNameRanges result; 618 if (last_coloncolon_position) { 619 result.context_range = Range(start_position.GetSavedPosition(), 620 last_coloncolon_position.value()); 621 result.basename_range = 622 Range(last_coloncolon_position.value() + 1, GetCurrentPosition()); 623 } else { 624 result.basename_range = 625 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 626 } 627 start_position.Remove(); 628 return result; 629 } else { 630 return None; 631 } 632 } 633 634 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 635 if (range.empty()) 636 return llvm::StringRef(); 637 assert(range.begin_index < range.end_index); 638 assert(range.begin_index < m_tokens.size()); 639 assert(range.end_index <= m_tokens.size()); 640 clang::Token &first_token = m_tokens[range.begin_index]; 641 clang::Token &last_token = m_tokens[range.end_index - 1]; 642 clang::SourceLocation start_loc = first_token.getLocation(); 643 clang::SourceLocation end_loc = last_token.getLocation(); 644 unsigned start_pos = start_loc.getRawEncoding(); 645 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 646 return m_text.take_front(end_pos).drop_front(start_pos); 647 } 648 649 static const clang::LangOptions &GetLangOptions() { 650 static clang::LangOptions g_options; 651 static llvm::once_flag g_once_flag; 652 llvm::call_once(g_once_flag, []() { 653 g_options.LineComment = true; 654 g_options.C99 = true; 655 g_options.C11 = true; 656 g_options.CPlusPlus = true; 657 g_options.CPlusPlus11 = true; 658 g_options.CPlusPlus14 = true; 659 g_options.CPlusPlus17 = true; 660 }); 661 return g_options; 662 } 663 664 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 665 static llvm::StringMap<tok::TokenKind> g_map{ 666 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 667 #include "clang/Basic/TokenKinds.def" 668 #undef KEYWORD 669 }; 670 return g_map; 671 } 672 673 void CPlusPlusNameParser::ExtractTokens() { 674 if (m_text.empty()) 675 return; 676 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 677 m_text.data(), m_text.data() + m_text.size()); 678 const auto &kw_map = GetKeywordsMap(); 679 clang::Token token; 680 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 681 lexer.LexFromRawLexer(token)) { 682 if (token.is(clang::tok::raw_identifier)) { 683 auto it = kw_map.find(token.getRawIdentifier()); 684 if (it != kw_map.end()) { 685 token.setKind(it->getValue()); 686 } 687 } 688 689 m_tokens.push_back(token); 690 } 691 } 692