1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and 11 /// clang::Selector interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/Basic/TokenKinds.h" 21 #include "llvm/ADT/DenseMapInfo.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringMap.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/Support/Allocator.h" 26 #include "llvm/Support/PointerLikeTypeTraits.h" 27 #include "llvm/Support/type_traits.h" 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <cstring> 32 #include <string> 33 #include <utility> 34 35 namespace clang { 36 37 class DeclarationName; 38 class DeclarationNameTable; 39 class IdentifierInfo; 40 class LangOptions; 41 class MultiKeywordSelector; 42 class SourceLocation; 43 44 enum class ReservedIdentifierStatus { 45 NotReserved = 0, 46 StartsWithUnderscoreAtGlobalScope, 47 StartsWithUnderscoreAndIsExternC, 48 StartsWithDoubleUnderscore, 49 StartsWithUnderscoreFollowedByCapitalLetter, 50 ContainsDoubleUnderscore, 51 }; 52 53 enum class ReservedLiteralSuffixIdStatus { 54 NotReserved = 0, 55 NotStartsWithUnderscore, 56 ContainsDoubleUnderscore, 57 }; 58 59 /// Determine whether an identifier is reserved for use as a name at global 60 /// scope. Such identifiers might be implementation-specific global functions 61 /// or variables. 62 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) { 63 return Status != ReservedIdentifierStatus::NotReserved; 64 } 65 66 /// Determine whether an identifier is reserved in all contexts. Such 67 /// identifiers might be implementation-specific keywords or macros, for 68 /// example. 69 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) { 70 return Status != ReservedIdentifierStatus::NotReserved && 71 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope && 72 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC; 73 } 74 75 /// A simple pair of identifier info and location. 76 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>; 77 78 /// IdentifierInfo and other related classes are aligned to 79 /// 8 bytes so that DeclarationName can use the lower 3 bits 80 /// of a pointer to one of these classes. 81 enum { IdentifierInfoAlignment = 8 }; 82 83 static constexpr int ObjCOrBuiltinIDBits = 16; 84 85 /// The "layout" of ObjCOrBuiltinID is: 86 /// - The first value (0) represents "not a special identifier". 87 /// - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not 88 /// including objc_not_keyword). 89 /// - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent 90 /// InterestingIdentifierKinds (not including not_interesting). 91 /// - The rest of the values represent builtin IDs (not including NotBuiltin). 92 static constexpr int FirstObjCKeywordID = 1; 93 static constexpr int LastObjCKeywordID = 94 FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2; 95 static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1; 96 static constexpr int LastInterestingIdentifierID = 97 FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2; 98 static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1; 99 100 /// One of these records is kept for each identifier that 101 /// is lexed. This contains information about whether the token was \#define'd, 102 /// is a language keyword, or if it is a front-end token of some sort (e.g. a 103 /// variable or function name). The preprocessor keeps this information in a 104 /// set, and all tok::identifier tokens have a pointer to one of these. 105 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits. 106 class alignas(IdentifierInfoAlignment) IdentifierInfo { 107 friend class IdentifierTable; 108 109 // Front-end token ID or tok::identifier. 110 unsigned TokenID : 9; 111 112 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 113 // First NUM_OBJC_KEYWORDS values are for Objective-C, 114 // the remaining values are for builtins. 115 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits; 116 117 // True if there is a #define for this. 118 unsigned HasMacro : 1; 119 120 // True if there was a #define for this. 121 unsigned HadMacro : 1; 122 123 // True if the identifier is a language extension. 124 unsigned IsExtension : 1; 125 126 // True if the identifier is a keyword in a newer or proposed Standard. 127 unsigned IsFutureCompatKeyword : 1; 128 129 // True if the identifier is poisoned. 130 unsigned IsPoisoned : 1; 131 132 // True if the identifier is a C++ operator keyword. 133 unsigned IsCPPOperatorKeyword : 1; 134 135 // Internal bit set by the member function RecomputeNeedsHandleIdentifier. 136 // See comment about RecomputeNeedsHandleIdentifier for more info. 137 unsigned NeedsHandleIdentifier : 1; 138 139 // True if the identifier was loaded (at least partially) from an AST file. 140 unsigned IsFromAST : 1; 141 142 // True if the identifier has changed from the definition 143 // loaded from an AST file. 144 unsigned ChangedAfterLoad : 1; 145 146 // True if the identifier's frontend information has changed from the 147 // definition loaded from an AST file. 148 unsigned FEChangedAfterLoad : 1; 149 150 // True if revertTokenIDToIdentifier was called. 151 unsigned RevertedTokenID : 1; 152 153 // True if there may be additional information about 154 // this identifier stored externally. 155 unsigned OutOfDate : 1; 156 157 // True if this is the 'import' contextual keyword. 158 unsigned IsModulesImport : 1; 159 160 // True if this is a mangled OpenMP variant name. 161 unsigned IsMangledOpenMPVariantName : 1; 162 163 // True if this is a deprecated macro. 164 unsigned IsDeprecatedMacro : 1; 165 166 // True if this macro is unsafe in headers. 167 unsigned IsRestrictExpansion : 1; 168 169 // True if this macro is final. 170 unsigned IsFinal : 1; 171 172 // 22 bits left in a 64-bit word. 173 174 // Managed by the language front-end. 175 void *FETokenInfo = nullptr; 176 177 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; 178 179 IdentifierInfo() 180 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false), 181 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), 182 IsPoisoned(false), IsCPPOperatorKeyword(false), 183 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), 184 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), 185 IsModulesImport(false), IsMangledOpenMPVariantName(false), 186 IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} 187 188 public: 189 IdentifierInfo(const IdentifierInfo &) = delete; 190 IdentifierInfo &operator=(const IdentifierInfo &) = delete; 191 IdentifierInfo(IdentifierInfo &&) = delete; 192 IdentifierInfo &operator=(IdentifierInfo &&) = delete; 193 194 /// Return true if this is the identifier for the specified string. 195 /// 196 /// This is intended to be used for string literals only: II->isStr("foo"). 197 template <std::size_t StrLen> 198 bool isStr(const char (&Str)[StrLen]) const { 199 return getLength() == StrLen-1 && 200 memcmp(getNameStart(), Str, StrLen-1) == 0; 201 } 202 203 /// Return true if this is the identifier for the specified StringRef. 204 bool isStr(llvm::StringRef Str) const { 205 llvm::StringRef ThisStr(getNameStart(), getLength()); 206 return ThisStr == Str; 207 } 208 209 /// Return the beginning of the actual null-terminated string for this 210 /// identifier. 211 const char *getNameStart() const { return Entry->getKeyData(); } 212 213 /// Efficiently return the length of this identifier info. 214 unsigned getLength() const { return Entry->getKeyLength(); } 215 216 /// Return the actual identifier string. 217 StringRef getName() const { 218 return StringRef(getNameStart(), getLength()); 219 } 220 221 /// Return true if this identifier is \#defined to some other value. 222 /// \note The current definition may be in a module and not currently visible. 223 bool hasMacroDefinition() const { 224 return HasMacro; 225 } 226 void setHasMacroDefinition(bool Val) { 227 if (HasMacro == Val) return; 228 229 HasMacro = Val; 230 if (Val) { 231 NeedsHandleIdentifier = true; 232 HadMacro = true; 233 } else { 234 // If this is a final macro, make the deprecation and header unsafe bits 235 // stick around after the undefinition so they apply to any redefinitions. 236 if (!IsFinal) { 237 // Because calling the setters of these calls recomputes, just set them 238 // manually to avoid recomputing a bunch of times. 239 IsDeprecatedMacro = false; 240 IsRestrictExpansion = false; 241 } 242 RecomputeNeedsHandleIdentifier(); 243 } 244 } 245 /// Returns true if this identifier was \#defined to some value at any 246 /// moment. In this case there should be an entry for the identifier in the 247 /// macro history table in Preprocessor. 248 bool hadMacroDefinition() const { 249 return HadMacro; 250 } 251 252 bool isDeprecatedMacro() const { return IsDeprecatedMacro; } 253 254 void setIsDeprecatedMacro(bool Val) { 255 if (IsDeprecatedMacro == Val) 256 return; 257 IsDeprecatedMacro = Val; 258 if (Val) 259 NeedsHandleIdentifier = true; 260 else 261 RecomputeNeedsHandleIdentifier(); 262 } 263 264 bool isRestrictExpansion() const { return IsRestrictExpansion; } 265 266 void setIsRestrictExpansion(bool Val) { 267 if (IsRestrictExpansion == Val) 268 return; 269 IsRestrictExpansion = Val; 270 if (Val) 271 NeedsHandleIdentifier = true; 272 else 273 RecomputeNeedsHandleIdentifier(); 274 } 275 276 bool isFinal() const { return IsFinal; } 277 278 void setIsFinal(bool Val) { IsFinal = Val; } 279 280 /// If this is a source-language token (e.g. 'for'), this API 281 /// can be used to cause the lexer to map identifiers to source-language 282 /// tokens. 283 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 284 285 /// True if revertTokenIDToIdentifier() was called. 286 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 287 288 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 289 /// compatibility. 290 /// 291 /// TokenID is normally read-only but there are 2 instances where we revert it 292 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 293 /// using this method so we can inform serialization about it. 294 void revertTokenIDToIdentifier() { 295 assert(TokenID != tok::identifier && "Already at tok::identifier"); 296 TokenID = tok::identifier; 297 RevertedTokenID = true; 298 } 299 void revertIdentifierToTokenID(tok::TokenKind TK) { 300 assert(TokenID == tok::identifier && "Should be at tok::identifier"); 301 TokenID = TK; 302 RevertedTokenID = false; 303 } 304 305 /// Return the preprocessor keyword ID for this identifier. 306 /// 307 /// For example, "define" will return tok::pp_define. 308 tok::PPKeywordKind getPPKeywordID() const; 309 310 /// Return the Objective-C keyword ID for the this identifier. 311 /// 312 /// For example, 'class' will return tok::objc_class if ObjC is enabled. 313 tok::ObjCKeywordKind getObjCKeywordID() const { 314 static_assert(FirstObjCKeywordID == 1, 315 "hard-coding this assumption to simplify code"); 316 if (ObjCOrBuiltinID <= LastObjCKeywordID) 317 return tok::ObjCKeywordKind(ObjCOrBuiltinID); 318 else 319 return tok::objc_not_keyword; 320 } 321 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 322 323 /// Return a value indicating whether this is a builtin function. 324 /// 325 /// 0 is not-built-in. 1+ are specific builtin functions. 326 unsigned getBuiltinID() const { 327 if (ObjCOrBuiltinID >= FirstBuiltinID) 328 return 1 + (ObjCOrBuiltinID - FirstBuiltinID); 329 else 330 return 0; 331 } 332 void setBuiltinID(unsigned ID) { 333 assert(ID != 0); 334 ObjCOrBuiltinID = FirstBuiltinID + (ID - 1); 335 assert(getBuiltinID() == ID && "ID too large for field!"); 336 } 337 void clearBuiltinID() { ObjCOrBuiltinID = 0; } 338 339 tok::InterestingIdentifierKind getInterestingIdentifierID() const { 340 if (ObjCOrBuiltinID >= FirstInterestingIdentifierID && 341 ObjCOrBuiltinID <= LastInterestingIdentifierID) 342 return tok::InterestingIdentifierKind( 343 1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID)); 344 else 345 return tok::not_interesting; 346 } 347 void setInterestingIdentifierID(unsigned ID) { 348 assert(ID != tok::not_interesting); 349 ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1); 350 assert(getInterestingIdentifierID() == ID && "ID too large for field!"); 351 } 352 353 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } 354 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 355 356 /// get/setExtension - Initialize information about whether or not this 357 /// language token is an extension. This controls extension warnings, and is 358 /// only valid if a custom token ID is set. 359 bool isExtensionToken() const { return IsExtension; } 360 void setIsExtensionToken(bool Val) { 361 IsExtension = Val; 362 if (Val) 363 NeedsHandleIdentifier = true; 364 else 365 RecomputeNeedsHandleIdentifier(); 366 } 367 368 /// is/setIsFutureCompatKeyword - Initialize information about whether or not 369 /// this language token is a keyword in a newer or proposed Standard. This 370 /// controls compatibility warnings, and is only true when not parsing the 371 /// corresponding Standard. Once a compatibility problem has been diagnosed 372 /// with this keyword, the flag will be cleared. 373 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; } 374 void setIsFutureCompatKeyword(bool Val) { 375 IsFutureCompatKeyword = Val; 376 if (Val) 377 NeedsHandleIdentifier = true; 378 else 379 RecomputeNeedsHandleIdentifier(); 380 } 381 382 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 383 /// Preprocessor will emit an error every time this token is used. 384 void setIsPoisoned(bool Value = true) { 385 IsPoisoned = Value; 386 if (Value) 387 NeedsHandleIdentifier = true; 388 else 389 RecomputeNeedsHandleIdentifier(); 390 } 391 392 /// Return true if this token has been poisoned. 393 bool isPoisoned() const { return IsPoisoned; } 394 395 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 396 /// this identifier is a C++ alternate representation of an operator. 397 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 398 IsCPPOperatorKeyword = Val; 399 } 400 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 401 402 /// Return true if this token is a keyword in the specified language. 403 bool isKeyword(const LangOptions &LangOpts) const; 404 405 /// Return true if this token is a C++ keyword in the specified 406 /// language. 407 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; 408 409 /// Get and set FETokenInfo. The language front-end is allowed to associate 410 /// arbitrary metadata with this token. 411 void *getFETokenInfo() const { return FETokenInfo; } 412 void setFETokenInfo(void *T) { FETokenInfo = T; } 413 414 /// Return true if the Preprocessor::HandleIdentifier must be called 415 /// on a token of this identifier. 416 /// 417 /// If this returns false, we know that HandleIdentifier will not affect 418 /// the token. 419 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 420 421 /// Return true if the identifier in its current state was loaded 422 /// from an AST file. 423 bool isFromAST() const { return IsFromAST; } 424 425 void setIsFromAST() { IsFromAST = true; } 426 427 /// Determine whether this identifier has changed since it was loaded 428 /// from an AST file. 429 bool hasChangedSinceDeserialization() const { 430 return ChangedAfterLoad; 431 } 432 433 /// Note that this identifier has changed since it was loaded from 434 /// an AST file. 435 void setChangedSinceDeserialization() { 436 ChangedAfterLoad = true; 437 } 438 439 /// Determine whether the frontend token information for this 440 /// identifier has changed since it was loaded from an AST file. 441 bool hasFETokenInfoChangedSinceDeserialization() const { 442 return FEChangedAfterLoad; 443 } 444 445 /// Note that the frontend token information for this identifier has 446 /// changed since it was loaded from an AST file. 447 void setFETokenInfoChangedSinceDeserialization() { 448 FEChangedAfterLoad = true; 449 } 450 451 /// Determine whether the information for this identifier is out of 452 /// date with respect to the external source. 453 bool isOutOfDate() const { return OutOfDate; } 454 455 /// Set whether the information for this identifier is out of 456 /// date with respect to the external source. 457 void setOutOfDate(bool OOD) { 458 OutOfDate = OOD; 459 if (OOD) 460 NeedsHandleIdentifier = true; 461 else 462 RecomputeNeedsHandleIdentifier(); 463 } 464 465 /// Determine whether this is the contextual keyword \c import. 466 bool isModulesImport() const { return IsModulesImport; } 467 468 /// Set whether this identifier is the contextual keyword \c import. 469 void setModulesImport(bool I) { 470 IsModulesImport = I; 471 if (I) 472 NeedsHandleIdentifier = true; 473 else 474 RecomputeNeedsHandleIdentifier(); 475 } 476 477 /// Determine whether this is the mangled name of an OpenMP variant. 478 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } 479 480 /// Set whether this is the mangled name of an OpenMP variant. 481 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; } 482 483 /// Return true if this identifier is an editor placeholder. 484 /// 485 /// Editor placeholders are produced by the code-completion engine and are 486 /// represented as characters between '<#' and '#>' in the source code. An 487 /// example of auto-completed call with a placeholder parameter is shown 488 /// below: 489 /// \code 490 /// function(<#int x#>); 491 /// \endcode 492 bool isEditorPlaceholder() const { 493 return getName().startswith("<#") && getName().endswith("#>"); 494 } 495 496 /// Determine whether \p this is a name reserved for the implementation (C99 497 /// 7.1.3, C++ [lib.global.names]). 498 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const; 499 500 /// Determine whether \p this is a name reserved for future standardization or 501 /// the implementation (C++ [usrlit.suffix]). 502 ReservedLiteralSuffixIdStatus isReservedLiteralSuffixId() const; 503 504 /// If the identifier is an "uglified" reserved name, return a cleaned form. 505 /// e.g. _Foo => Foo. Otherwise, just returns the name. 506 StringRef deuglifiedName() const; 507 508 /// Provide less than operator for lexicographical sorting. 509 bool operator<(const IdentifierInfo &RHS) const { 510 return getName() < RHS.getName(); 511 } 512 513 private: 514 /// The Preprocessor::HandleIdentifier does several special (but rare) 515 /// things to identifiers of various sorts. For example, it changes the 516 /// \c for keyword token from tok::identifier to tok::for. 517 /// 518 /// This method is very tied to the definition of HandleIdentifier. Any 519 /// change to it should be reflected here. 520 void RecomputeNeedsHandleIdentifier() { 521 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || 522 isExtensionToken() || isFutureCompatKeyword() || 523 isOutOfDate() || isModulesImport(); 524 } 525 }; 526 527 /// An RAII object for [un]poisoning an identifier within a scope. 528 /// 529 /// \p II is allowed to be null, in which case objects of this type have 530 /// no effect. 531 class PoisonIdentifierRAIIObject { 532 IdentifierInfo *const II; 533 const bool OldValue; 534 535 public: 536 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 537 : II(II), OldValue(II ? II->isPoisoned() : false) { 538 if(II) 539 II->setIsPoisoned(NewValue); 540 } 541 542 ~PoisonIdentifierRAIIObject() { 543 if(II) 544 II->setIsPoisoned(OldValue); 545 } 546 }; 547 548 /// An iterator that walks over all of the known identifiers 549 /// in the lookup table. 550 /// 551 /// Since this iterator uses an abstract interface via virtual 552 /// functions, it uses an object-oriented interface rather than the 553 /// more standard C++ STL iterator interface. In this OO-style 554 /// iteration, the single function \c Next() provides dereference, 555 /// advance, and end-of-sequence checking in a single 556 /// operation. Subclasses of this iterator type will provide the 557 /// actual functionality. 558 class IdentifierIterator { 559 protected: 560 IdentifierIterator() = default; 561 562 public: 563 IdentifierIterator(const IdentifierIterator &) = delete; 564 IdentifierIterator &operator=(const IdentifierIterator &) = delete; 565 566 virtual ~IdentifierIterator(); 567 568 /// Retrieve the next string in the identifier table and 569 /// advances the iterator for the following string. 570 /// 571 /// \returns The next string in the identifier table. If there is 572 /// no such string, returns an empty \c StringRef. 573 virtual StringRef Next() = 0; 574 }; 575 576 /// Provides lookups to, and iteration over, IdentiferInfo objects. 577 class IdentifierInfoLookup { 578 public: 579 virtual ~IdentifierInfoLookup(); 580 581 /// Return the IdentifierInfo for the specified named identifier. 582 /// 583 /// Unlike the version in IdentifierTable, this returns a pointer instead 584 /// of a reference. If the pointer is null then the IdentifierInfo cannot 585 /// be found. 586 virtual IdentifierInfo* get(StringRef Name) = 0; 587 588 /// Retrieve an iterator into the set of all identifiers 589 /// known to this identifier lookup source. 590 /// 591 /// This routine provides access to all of the identifiers known to 592 /// the identifier lookup, allowing access to the contents of the 593 /// identifiers without introducing the overhead of constructing 594 /// IdentifierInfo objects for each. 595 /// 596 /// \returns A new iterator into the set of known identifiers. The 597 /// caller is responsible for deleting this iterator. 598 virtual IdentifierIterator *getIdentifiers(); 599 }; 600 601 /// Implements an efficient mapping from strings to IdentifierInfo nodes. 602 /// 603 /// This has no other purpose, but this is an extremely performance-critical 604 /// piece of the code, as each occurrence of every identifier goes through 605 /// here when lexed. 606 class IdentifierTable { 607 // Shark shows that using MallocAllocator is *much* slower than using this 608 // BumpPtrAllocator! 609 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>; 610 HashTableTy HashTable; 611 612 IdentifierInfoLookup* ExternalLookup; 613 614 public: 615 /// Create the identifier table. 616 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr); 617 618 /// Create the identifier table, populating it with info about the 619 /// language keywords for the language specified by \p LangOpts. 620 explicit IdentifierTable(const LangOptions &LangOpts, 621 IdentifierInfoLookup *ExternalLookup = nullptr); 622 623 /// Set the external identifier lookup mechanism. 624 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 625 ExternalLookup = IILookup; 626 } 627 628 /// Retrieve the external identifier lookup object, if any. 629 IdentifierInfoLookup *getExternalIdentifierLookup() const { 630 return ExternalLookup; 631 } 632 633 llvm::BumpPtrAllocator& getAllocator() { 634 return HashTable.getAllocator(); 635 } 636 637 /// Return the identifier token info for the specified named 638 /// identifier. 639 IdentifierInfo &get(StringRef Name) { 640 auto &Entry = *HashTable.try_emplace(Name, nullptr).first; 641 642 IdentifierInfo *&II = Entry.second; 643 if (II) return *II; 644 645 // No entry; if we have an external lookup, look there first. 646 if (ExternalLookup) { 647 II = ExternalLookup->get(Name); 648 if (II) 649 return *II; 650 } 651 652 // Lookups failed, make a new IdentifierInfo. 653 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 654 II = new (Mem) IdentifierInfo(); 655 656 // Make sure getName() knows how to find the IdentifierInfo 657 // contents. 658 II->Entry = &Entry; 659 660 return *II; 661 } 662 663 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 664 IdentifierInfo &II = get(Name); 665 II.TokenID = TokenCode; 666 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 667 return II; 668 } 669 670 /// Gets an IdentifierInfo for the given name without consulting 671 /// external sources. 672 /// 673 /// This is a version of get() meant for external sources that want to 674 /// introduce or modify an identifier. If they called get(), they would 675 /// likely end up in a recursion. 676 IdentifierInfo &getOwn(StringRef Name) { 677 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 678 679 IdentifierInfo *&II = Entry.second; 680 if (II) 681 return *II; 682 683 // Lookups failed, make a new IdentifierInfo. 684 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 685 II = new (Mem) IdentifierInfo(); 686 687 // Make sure getName() knows how to find the IdentifierInfo 688 // contents. 689 II->Entry = &Entry; 690 691 // If this is the 'import' contextual keyword, mark it as such. 692 if (Name.equals("import")) 693 II->setModulesImport(true); 694 695 return *II; 696 } 697 698 using iterator = HashTableTy::const_iterator; 699 using const_iterator = HashTableTy::const_iterator; 700 701 iterator begin() const { return HashTable.begin(); } 702 iterator end() const { return HashTable.end(); } 703 unsigned size() const { return HashTable.size(); } 704 705 iterator find(StringRef Name) const { return HashTable.find(Name); } 706 707 /// Print some statistics to stderr that indicate how well the 708 /// hashing is doing. 709 void PrintStats() const; 710 711 /// Populate the identifier table with info about the language keywords 712 /// for the language specified by \p LangOpts. 713 void AddKeywords(const LangOptions &LangOpts); 714 715 /// Returns the correct diagnostic to issue for a future-compat diagnostic 716 /// warning. Note, this function assumes the identifier passed has already 717 /// been determined to be a future compatible keyword. 718 diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 719 const LangOptions &LangOpts); 720 }; 721 722 /// A family of Objective-C methods. 723 /// 724 /// These families have no inherent meaning in the language, but are 725 /// nonetheless central enough in the existing implementations to 726 /// merit direct AST support. While, in theory, arbitrary methods can 727 /// be considered to form families, we focus here on the methods 728 /// involving allocation and retain-count management, as these are the 729 /// most "core" and the most likely to be useful to diverse clients 730 /// without extra information. 731 /// 732 /// Both selectors and actual method declarations may be classified 733 /// into families. Method families may impose additional restrictions 734 /// beyond their selector name; for example, a method called '_init' 735 /// that returns void is not considered to be in the 'init' family 736 /// (but would be if it returned 'id'). It is also possible to 737 /// explicitly change or remove a method's family. Therefore the 738 /// method's family should be considered the single source of truth. 739 enum ObjCMethodFamily { 740 /// No particular method family. 741 OMF_None, 742 743 // Selectors in these families may have arbitrary arity, may be 744 // written with arbitrary leading underscores, and may have 745 // additional CamelCase "words" in their first selector chunk 746 // following the family name. 747 OMF_alloc, 748 OMF_copy, 749 OMF_init, 750 OMF_mutableCopy, 751 OMF_new, 752 753 // These families are singletons consisting only of the nullary 754 // selector with the given name. 755 OMF_autorelease, 756 OMF_dealloc, 757 OMF_finalize, 758 OMF_release, 759 OMF_retain, 760 OMF_retainCount, 761 OMF_self, 762 OMF_initialize, 763 764 // performSelector families 765 OMF_performSelector 766 }; 767 768 /// Enough bits to store any enumerator in ObjCMethodFamily or 769 /// InvalidObjCMethodFamily. 770 enum { ObjCMethodFamilyBitWidth = 4 }; 771 772 /// An invalid value of ObjCMethodFamily. 773 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 774 775 /// A family of Objective-C methods. 776 /// 777 /// These are family of methods whose result type is initially 'id', but 778 /// but are candidate for the result type to be changed to 'instancetype'. 779 enum ObjCInstanceTypeFamily { 780 OIT_None, 781 OIT_Array, 782 OIT_Dictionary, 783 OIT_Singleton, 784 OIT_Init, 785 OIT_ReturnsSelf 786 }; 787 788 enum ObjCStringFormatFamily { 789 SFF_None, 790 SFF_NSString, 791 SFF_CFString 792 }; 793 794 /// Smart pointer class that efficiently represents Objective-C method 795 /// names. 796 /// 797 /// This class will either point to an IdentifierInfo or a 798 /// MultiKeywordSelector (which is private). This enables us to optimize 799 /// selectors that take no arguments and selectors that take 1 argument, which 800 /// accounts for 78% of all selectors in Cocoa.h. 801 class Selector { 802 friend class Diagnostic; 803 friend class SelectorTable; // only the SelectorTable can create these 804 friend class DeclarationName; // and the AST's DeclarationName. 805 806 enum IdentifierInfoFlag { 807 // Empty selector = 0. Note that these enumeration values must 808 // correspond to the enumeration values of DeclarationName::StoredNameKind 809 ZeroArg = 0x01, 810 OneArg = 0x02, 811 MultiArg = 0x07, 812 ArgFlags = 0x07 813 }; 814 815 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low 816 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any 817 /// case IdentifierInfo and MultiKeywordSelector are already aligned to 818 /// 8 bytes even on 32 bits archs because of DeclarationName. 819 uintptr_t InfoPtr = 0; 820 821 Selector(IdentifierInfo *II, unsigned nArgs) { 822 InfoPtr = reinterpret_cast<uintptr_t>(II); 823 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 824 assert(nArgs < 2 && "nArgs not equal to 0/1"); 825 InfoPtr |= nArgs+1; 826 } 827 828 Selector(MultiKeywordSelector *SI) { 829 InfoPtr = reinterpret_cast<uintptr_t>(SI); 830 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 831 InfoPtr |= MultiArg; 832 } 833 834 IdentifierInfo *getAsIdentifierInfo() const { 835 if (getIdentifierInfoFlag() < MultiArg) 836 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 837 return nullptr; 838 } 839 840 MultiKeywordSelector *getMultiKeywordSelector() const { 841 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 842 } 843 844 unsigned getIdentifierInfoFlag() const { 845 return InfoPtr & ArgFlags; 846 } 847 848 static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 849 850 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel); 851 852 public: 853 /// The default ctor should only be used when creating data structures that 854 /// will contain selectors. 855 Selector() = default; 856 explicit Selector(uintptr_t V) : InfoPtr(V) {} 857 858 /// operator==/!= - Indicate whether the specified selectors are identical. 859 bool operator==(Selector RHS) const { 860 return InfoPtr == RHS.InfoPtr; 861 } 862 bool operator!=(Selector RHS) const { 863 return InfoPtr != RHS.InfoPtr; 864 } 865 866 void *getAsOpaquePtr() const { 867 return reinterpret_cast<void*>(InfoPtr); 868 } 869 870 /// Determine whether this is the empty selector. 871 bool isNull() const { return InfoPtr == 0; } 872 873 // Predicates to identify the selector type. 874 bool isKeywordSelector() const { 875 return getIdentifierInfoFlag() != ZeroArg; 876 } 877 878 bool isUnarySelector() const { 879 return getIdentifierInfoFlag() == ZeroArg; 880 } 881 882 /// If this selector is the specific keyword selector described by Names. 883 bool isKeywordSelector(ArrayRef<StringRef> Names) const; 884 885 /// If this selector is the specific unary selector described by Name. 886 bool isUnarySelector(StringRef Name) const; 887 888 unsigned getNumArgs() const; 889 890 /// Retrieve the identifier at a given position in the selector. 891 /// 892 /// Note that the identifier pointer returned may be NULL. Clients that only 893 /// care about the text of the identifier string, and not the specific, 894 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 895 /// an empty string when the identifier pointer would be NULL. 896 /// 897 /// \param argIndex The index for which we want to retrieve the identifier. 898 /// This index shall be less than \c getNumArgs() unless this is a keyword 899 /// selector, in which case 0 is the only permissible value. 900 /// 901 /// \returns the uniqued identifier for this slot, or NULL if this slot has 902 /// no corresponding identifier. 903 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 904 905 /// Retrieve the name at a given position in the selector. 906 /// 907 /// \param argIndex The index for which we want to retrieve the name. 908 /// This index shall be less than \c getNumArgs() unless this is a keyword 909 /// selector, in which case 0 is the only permissible value. 910 /// 911 /// \returns the name for this slot, which may be the empty string if no 912 /// name was supplied. 913 StringRef getNameForSlot(unsigned argIndex) const; 914 915 /// Derive the full selector name (e.g. "foo:bar:") and return 916 /// it as an std::string. 917 std::string getAsString() const; 918 919 /// Prints the full selector name (e.g. "foo:bar:"). 920 void print(llvm::raw_ostream &OS) const; 921 922 void dump() const; 923 924 /// Derive the conventional family of this method. 925 ObjCMethodFamily getMethodFamily() const { 926 return getMethodFamilyImpl(*this); 927 } 928 929 ObjCStringFormatFamily getStringFormatFamily() const { 930 return getStringFormatFamilyImpl(*this); 931 } 932 933 static Selector getEmptyMarker() { 934 return Selector(uintptr_t(-1)); 935 } 936 937 static Selector getTombstoneMarker() { 938 return Selector(uintptr_t(-2)); 939 } 940 941 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 942 }; 943 944 /// This table allows us to fully hide how we implement 945 /// multi-keyword caching. 946 class SelectorTable { 947 // Actually a SelectorTableImpl 948 void *Impl; 949 950 public: 951 SelectorTable(); 952 SelectorTable(const SelectorTable &) = delete; 953 SelectorTable &operator=(const SelectorTable &) = delete; 954 ~SelectorTable(); 955 956 /// Can create any sort of selector. 957 /// 958 /// \p NumArgs indicates whether this is a no argument selector "foo", a 959 /// single argument selector "foo:" or multi-argument "foo:bar:". 960 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 961 962 Selector getUnarySelector(IdentifierInfo *ID) { 963 return Selector(ID, 1); 964 } 965 966 Selector getNullarySelector(IdentifierInfo *ID) { 967 return Selector(ID, 0); 968 } 969 970 /// Return the total amount of memory allocated for managing selectors. 971 size_t getTotalMemory() const; 972 973 /// Return the default setter name for the given identifier. 974 /// 975 /// This is "set" + \p Name where the initial character of \p Name 976 /// has been capitalized. 977 static SmallString<64> constructSetterName(StringRef Name); 978 979 /// Return the default setter selector for the given identifier. 980 /// 981 /// This is "set" + \p Name where the initial character of \p Name 982 /// has been capitalized. 983 static Selector constructSetterSelector(IdentifierTable &Idents, 984 SelectorTable &SelTable, 985 const IdentifierInfo *Name); 986 987 /// Return the property name for the given setter selector. 988 static std::string getPropertyNameFromSetterSelector(Selector Sel); 989 }; 990 991 namespace detail { 992 993 /// DeclarationNameExtra is used as a base of various uncommon special names. 994 /// This class is needed since DeclarationName has not enough space to store 995 /// the kind of every possible names. Therefore the kind of common names is 996 /// stored directly in DeclarationName, and the kind of uncommon names is 997 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because 998 /// DeclarationName needs the lower 3 bits to store the kind of common names. 999 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change 1000 /// here is very likely to require changes in DeclarationName(Table). 1001 class alignas(IdentifierInfoAlignment) DeclarationNameExtra { 1002 friend class clang::DeclarationName; 1003 friend class clang::DeclarationNameTable; 1004 1005 protected: 1006 /// The kind of "extra" information stored in the DeclarationName. See 1007 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values 1008 /// are used. Note that DeclarationName depends on the numerical values 1009 /// of the enumerators in this enum. See DeclarationName::StoredNameKind 1010 /// for more info. 1011 enum ExtraKind { 1012 CXXDeductionGuideName, 1013 CXXLiteralOperatorName, 1014 CXXUsingDirective, 1015 ObjCMultiArgSelector 1016 }; 1017 1018 /// ExtraKindOrNumArgs has one of the following meaning: 1019 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra 1020 /// is in this case in fact either a CXXDeductionGuideNameExtra or 1021 /// a CXXLiteralOperatorIdName. 1022 /// 1023 /// * It may be also name common to C++ using-directives (CXXUsingDirective), 1024 /// 1025 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is 1026 /// the number of arguments in the Objective-C selector, in which 1027 /// case the DeclarationNameExtra is also a MultiKeywordSelector. 1028 unsigned ExtraKindOrNumArgs; 1029 1030 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {} 1031 DeclarationNameExtra(unsigned NumArgs) 1032 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {} 1033 1034 /// Return the corresponding ExtraKind. 1035 ExtraKind getKind() const { 1036 return static_cast<ExtraKind>(ExtraKindOrNumArgs > 1037 (unsigned)ObjCMultiArgSelector 1038 ? (unsigned)ObjCMultiArgSelector 1039 : ExtraKindOrNumArgs); 1040 } 1041 1042 /// Return the number of arguments in an ObjC selector. Only valid when this 1043 /// is indeed an ObjCMultiArgSelector. 1044 unsigned getNumArgs() const { 1045 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector && 1046 "getNumArgs called but this is not an ObjC selector!"); 1047 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector; 1048 } 1049 }; 1050 1051 } // namespace detail 1052 1053 } // namespace clang 1054 1055 namespace llvm { 1056 1057 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 1058 /// DenseSets. 1059 template <> 1060 struct DenseMapInfo<clang::Selector> { 1061 static clang::Selector getEmptyKey() { 1062 return clang::Selector::getEmptyMarker(); 1063 } 1064 1065 static clang::Selector getTombstoneKey() { 1066 return clang::Selector::getTombstoneMarker(); 1067 } 1068 1069 static unsigned getHashValue(clang::Selector S); 1070 1071 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 1072 return LHS == RHS; 1073 } 1074 }; 1075 1076 template<> 1077 struct PointerLikeTypeTraits<clang::Selector> { 1078 static const void *getAsVoidPointer(clang::Selector P) { 1079 return P.getAsOpaquePtr(); 1080 } 1081 1082 static clang::Selector getFromVoidPointer(const void *P) { 1083 return clang::Selector(reinterpret_cast<uintptr_t>(P)); 1084 } 1085 1086 static constexpr int NumLowBitsAvailable = 0; 1087 }; 1088 1089 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 1090 // are not guaranteed to be 8-byte aligned. 1091 template<> 1092 struct PointerLikeTypeTraits<clang::IdentifierInfo*> { 1093 static void *getAsVoidPointer(clang::IdentifierInfo* P) { 1094 return P; 1095 } 1096 1097 static clang::IdentifierInfo *getFromVoidPointer(void *P) { 1098 return static_cast<clang::IdentifierInfo*>(P); 1099 } 1100 1101 static constexpr int NumLowBitsAvailable = 1; 1102 }; 1103 1104 template<> 1105 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> { 1106 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 1107 return P; 1108 } 1109 1110 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 1111 return static_cast<const clang::IdentifierInfo*>(P); 1112 } 1113 1114 static constexpr int NumLowBitsAvailable = 1; 1115 }; 1116 1117 } // namespace llvm 1118 1119 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 1120