1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and 11 /// clang::Selector interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18 #include "clang/Basic/DiagnosticIDs.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/Basic/TokenKinds.h" 21 #include "llvm/ADT/DenseMapInfo.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/StringMap.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/Support/Allocator.h" 26 #include "llvm/Support/PointerLikeTypeTraits.h" 27 #include "llvm/Support/type_traits.h" 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <cstring> 32 #include <string> 33 #include <utility> 34 35 namespace clang { 36 37 class DeclarationName; 38 class DeclarationNameTable; 39 class IdentifierInfo; 40 class LangOptions; 41 class MultiKeywordSelector; 42 class SourceLocation; 43 44 enum class ReservedIdentifierStatus { 45 NotReserved = 0, 46 StartsWithUnderscoreAtGlobalScope, 47 StartsWithUnderscoreAndIsExternC, 48 StartsWithDoubleUnderscore, 49 StartsWithUnderscoreFollowedByCapitalLetter, 50 ContainsDoubleUnderscore, 51 }; 52 53 /// Determine whether an identifier is reserved for use as a name at global 54 /// scope. Such identifiers might be implementation-specific global functions 55 /// or variables. 56 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) { 57 return Status != ReservedIdentifierStatus::NotReserved; 58 } 59 60 /// Determine whether an identifier is reserved in all contexts. Such 61 /// identifiers might be implementation-specific keywords or macros, for 62 /// example. 63 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) { 64 return Status != ReservedIdentifierStatus::NotReserved && 65 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope && 66 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC; 67 } 68 69 /// A simple pair of identifier info and location. 70 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>; 71 72 /// IdentifierInfo and other related classes are aligned to 73 /// 8 bytes so that DeclarationName can use the lower 3 bits 74 /// of a pointer to one of these classes. 75 enum { IdentifierInfoAlignment = 8 }; 76 77 static constexpr int ObjCOrBuiltinIDBits = 16; 78 79 /// One of these records is kept for each identifier that 80 /// is lexed. This contains information about whether the token was \#define'd, 81 /// is a language keyword, or if it is a front-end token of some sort (e.g. a 82 /// variable or function name). The preprocessor keeps this information in a 83 /// set, and all tok::identifier tokens have a pointer to one of these. 84 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits. 85 class alignas(IdentifierInfoAlignment) IdentifierInfo { 86 friend class IdentifierTable; 87 88 // Front-end token ID or tok::identifier. 89 unsigned TokenID : 9; 90 91 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 92 // First NUM_OBJC_KEYWORDS values are for Objective-C, 93 // the remaining values are for builtins. 94 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits; 95 96 // True if there is a #define for this. 97 unsigned HasMacro : 1; 98 99 // True if there was a #define for this. 100 unsigned HadMacro : 1; 101 102 // True if the identifier is a language extension. 103 unsigned IsExtension : 1; 104 105 // True if the identifier is a keyword in a newer or proposed Standard. 106 unsigned IsFutureCompatKeyword : 1; 107 108 // True if the identifier is poisoned. 109 unsigned IsPoisoned : 1; 110 111 // True if the identifier is a C++ operator keyword. 112 unsigned IsCPPOperatorKeyword : 1; 113 114 // Internal bit set by the member function RecomputeNeedsHandleIdentifier. 115 // See comment about RecomputeNeedsHandleIdentifier for more info. 116 unsigned NeedsHandleIdentifier : 1; 117 118 // True if the identifier was loaded (at least partially) from an AST file. 119 unsigned IsFromAST : 1; 120 121 // True if the identifier has changed from the definition 122 // loaded from an AST file. 123 unsigned ChangedAfterLoad : 1; 124 125 // True if the identifier's frontend information has changed from the 126 // definition loaded from an AST file. 127 unsigned FEChangedAfterLoad : 1; 128 129 // True if revertTokenIDToIdentifier was called. 130 unsigned RevertedTokenID : 1; 131 132 // True if there may be additional information about 133 // this identifier stored externally. 134 unsigned OutOfDate : 1; 135 136 // True if this is the 'import' contextual keyword. 137 unsigned IsModulesImport : 1; 138 139 // True if this is a mangled OpenMP variant name. 140 unsigned IsMangledOpenMPVariantName : 1; 141 142 // True if this is a deprecated macro. 143 unsigned IsDeprecatedMacro : 1; 144 145 // True if this macro is unsafe in headers. 146 unsigned IsRestrictExpansion : 1; 147 148 // True if this macro is final. 149 unsigned IsFinal : 1; 150 151 // 22 bits left in a 64-bit word. 152 153 // Managed by the language front-end. 154 void *FETokenInfo = nullptr; 155 156 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; 157 158 IdentifierInfo() 159 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false), 160 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), 161 IsPoisoned(false), IsCPPOperatorKeyword(false), 162 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), 163 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), 164 IsModulesImport(false), IsMangledOpenMPVariantName(false), 165 IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} 166 167 public: 168 IdentifierInfo(const IdentifierInfo &) = delete; 169 IdentifierInfo &operator=(const IdentifierInfo &) = delete; 170 IdentifierInfo(IdentifierInfo &&) = delete; 171 IdentifierInfo &operator=(IdentifierInfo &&) = delete; 172 173 /// Return true if this is the identifier for the specified string. 174 /// 175 /// This is intended to be used for string literals only: II->isStr("foo"). 176 template <std::size_t StrLen> 177 bool isStr(const char (&Str)[StrLen]) const { 178 return getLength() == StrLen-1 && 179 memcmp(getNameStart(), Str, StrLen-1) == 0; 180 } 181 182 /// Return true if this is the identifier for the specified StringRef. 183 bool isStr(llvm::StringRef Str) const { 184 llvm::StringRef ThisStr(getNameStart(), getLength()); 185 return ThisStr == Str; 186 } 187 188 /// Return the beginning of the actual null-terminated string for this 189 /// identifier. 190 const char *getNameStart() const { return Entry->getKeyData(); } 191 192 /// Efficiently return the length of this identifier info. 193 unsigned getLength() const { return Entry->getKeyLength(); } 194 195 /// Return the actual identifier string. 196 StringRef getName() const { 197 return StringRef(getNameStart(), getLength()); 198 } 199 200 /// Return true if this identifier is \#defined to some other value. 201 /// \note The current definition may be in a module and not currently visible. 202 bool hasMacroDefinition() const { 203 return HasMacro; 204 } 205 void setHasMacroDefinition(bool Val) { 206 if (HasMacro == Val) return; 207 208 HasMacro = Val; 209 if (Val) { 210 NeedsHandleIdentifier = true; 211 HadMacro = true; 212 } else { 213 // If this is a final macro, make the deprecation and header unsafe bits 214 // stick around after the undefinition so they apply to any redefinitions. 215 if (!IsFinal) { 216 // Because calling the setters of these calls recomputes, just set them 217 // manually to avoid recomputing a bunch of times. 218 IsDeprecatedMacro = false; 219 IsRestrictExpansion = false; 220 } 221 RecomputeNeedsHandleIdentifier(); 222 } 223 } 224 /// Returns true if this identifier was \#defined to some value at any 225 /// moment. In this case there should be an entry for the identifier in the 226 /// macro history table in Preprocessor. 227 bool hadMacroDefinition() const { 228 return HadMacro; 229 } 230 231 bool isDeprecatedMacro() const { return IsDeprecatedMacro; } 232 233 void setIsDeprecatedMacro(bool Val) { 234 if (IsDeprecatedMacro == Val) 235 return; 236 IsDeprecatedMacro = Val; 237 if (Val) 238 NeedsHandleIdentifier = true; 239 else 240 RecomputeNeedsHandleIdentifier(); 241 } 242 243 bool isRestrictExpansion() const { return IsRestrictExpansion; } 244 245 void setIsRestrictExpansion(bool Val) { 246 if (IsRestrictExpansion == Val) 247 return; 248 IsRestrictExpansion = Val; 249 if (Val) 250 NeedsHandleIdentifier = true; 251 else 252 RecomputeNeedsHandleIdentifier(); 253 } 254 255 bool isFinal() const { return IsFinal; } 256 257 void setIsFinal(bool Val) { IsFinal = Val; } 258 259 /// If this is a source-language token (e.g. 'for'), this API 260 /// can be used to cause the lexer to map identifiers to source-language 261 /// tokens. 262 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 263 264 /// True if revertTokenIDToIdentifier() was called. 265 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 266 267 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 268 /// compatibility. 269 /// 270 /// TokenID is normally read-only but there are 2 instances where we revert it 271 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 272 /// using this method so we can inform serialization about it. 273 void revertTokenIDToIdentifier() { 274 assert(TokenID != tok::identifier && "Already at tok::identifier"); 275 TokenID = tok::identifier; 276 RevertedTokenID = true; 277 } 278 void revertIdentifierToTokenID(tok::TokenKind TK) { 279 assert(TokenID == tok::identifier && "Should be at tok::identifier"); 280 TokenID = TK; 281 RevertedTokenID = false; 282 } 283 284 /// Return the preprocessor keyword ID for this identifier. 285 /// 286 /// For example, "define" will return tok::pp_define. 287 tok::PPKeywordKind getPPKeywordID() const; 288 289 /// Return the Objective-C keyword ID for the this identifier. 290 /// 291 /// For example, 'class' will return tok::objc_class if ObjC is enabled. 292 tok::ObjCKeywordKind getObjCKeywordID() const { 293 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 294 return tok::ObjCKeywordKind(ObjCOrBuiltinID); 295 else 296 return tok::objc_not_keyword; 297 } 298 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 299 300 /// Return a value indicating whether this is a builtin function. 301 /// 302 /// 0 is not-built-in. 1+ are specific builtin functions. 303 unsigned getBuiltinID() const { 304 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 305 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 306 else 307 return 0; 308 } 309 void setBuiltinID(unsigned ID) { 310 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 311 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 312 && "ID too large for field!"); 313 } 314 315 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } 316 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 317 318 /// get/setExtension - Initialize information about whether or not this 319 /// language token is an extension. This controls extension warnings, and is 320 /// only valid if a custom token ID is set. 321 bool isExtensionToken() const { return IsExtension; } 322 void setIsExtensionToken(bool Val) { 323 IsExtension = Val; 324 if (Val) 325 NeedsHandleIdentifier = true; 326 else 327 RecomputeNeedsHandleIdentifier(); 328 } 329 330 /// is/setIsFutureCompatKeyword - Initialize information about whether or not 331 /// this language token is a keyword in a newer or proposed Standard. This 332 /// controls compatibility warnings, and is only true when not parsing the 333 /// corresponding Standard. Once a compatibility problem has been diagnosed 334 /// with this keyword, the flag will be cleared. 335 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; } 336 void setIsFutureCompatKeyword(bool Val) { 337 IsFutureCompatKeyword = Val; 338 if (Val) 339 NeedsHandleIdentifier = true; 340 else 341 RecomputeNeedsHandleIdentifier(); 342 } 343 344 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 345 /// Preprocessor will emit an error every time this token is used. 346 void setIsPoisoned(bool Value = true) { 347 IsPoisoned = Value; 348 if (Value) 349 NeedsHandleIdentifier = true; 350 else 351 RecomputeNeedsHandleIdentifier(); 352 } 353 354 /// Return true if this token has been poisoned. 355 bool isPoisoned() const { return IsPoisoned; } 356 357 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 358 /// this identifier is a C++ alternate representation of an operator. 359 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 360 IsCPPOperatorKeyword = Val; 361 } 362 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 363 364 /// Return true if this token is a keyword in the specified language. 365 bool isKeyword(const LangOptions &LangOpts) const; 366 367 /// Return true if this token is a C++ keyword in the specified 368 /// language. 369 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; 370 371 /// Get and set FETokenInfo. The language front-end is allowed to associate 372 /// arbitrary metadata with this token. 373 void *getFETokenInfo() const { return FETokenInfo; } 374 void setFETokenInfo(void *T) { FETokenInfo = T; } 375 376 /// Return true if the Preprocessor::HandleIdentifier must be called 377 /// on a token of this identifier. 378 /// 379 /// If this returns false, we know that HandleIdentifier will not affect 380 /// the token. 381 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 382 383 /// Return true if the identifier in its current state was loaded 384 /// from an AST file. 385 bool isFromAST() const { return IsFromAST; } 386 387 void setIsFromAST() { IsFromAST = true; } 388 389 /// Determine whether this identifier has changed since it was loaded 390 /// from an AST file. 391 bool hasChangedSinceDeserialization() const { 392 return ChangedAfterLoad; 393 } 394 395 /// Note that this identifier has changed since it was loaded from 396 /// an AST file. 397 void setChangedSinceDeserialization() { 398 ChangedAfterLoad = true; 399 } 400 401 /// Determine whether the frontend token information for this 402 /// identifier has changed since it was loaded from an AST file. 403 bool hasFETokenInfoChangedSinceDeserialization() const { 404 return FEChangedAfterLoad; 405 } 406 407 /// Note that the frontend token information for this identifier has 408 /// changed since it was loaded from an AST file. 409 void setFETokenInfoChangedSinceDeserialization() { 410 FEChangedAfterLoad = true; 411 } 412 413 /// Determine whether the information for this identifier is out of 414 /// date with respect to the external source. 415 bool isOutOfDate() const { return OutOfDate; } 416 417 /// Set whether the information for this identifier is out of 418 /// date with respect to the external source. 419 void setOutOfDate(bool OOD) { 420 OutOfDate = OOD; 421 if (OOD) 422 NeedsHandleIdentifier = true; 423 else 424 RecomputeNeedsHandleIdentifier(); 425 } 426 427 /// Determine whether this is the contextual keyword \c import. 428 bool isModulesImport() const { return IsModulesImport; } 429 430 /// Set whether this identifier is the contextual keyword \c import. 431 void setModulesImport(bool I) { 432 IsModulesImport = I; 433 if (I) 434 NeedsHandleIdentifier = true; 435 else 436 RecomputeNeedsHandleIdentifier(); 437 } 438 439 /// Determine whether this is the mangled name of an OpenMP variant. 440 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } 441 442 /// Set whether this is the mangled name of an OpenMP variant. 443 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; } 444 445 /// Return true if this identifier is an editor placeholder. 446 /// 447 /// Editor placeholders are produced by the code-completion engine and are 448 /// represented as characters between '<#' and '#>' in the source code. An 449 /// example of auto-completed call with a placeholder parameter is shown 450 /// below: 451 /// \code 452 /// function(<#int x#>); 453 /// \endcode 454 bool isEditorPlaceholder() const { 455 return getName().startswith("<#") && getName().endswith("#>"); 456 } 457 458 /// Determine whether \p this is a name reserved for the implementation (C99 459 /// 7.1.3, C++ [lib.global.names]). 460 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const; 461 462 /// If the identifier is an "uglified" reserved name, return a cleaned form. 463 /// e.g. _Foo => Foo. Otherwise, just returns the name. 464 StringRef deuglifiedName() const; 465 466 /// Provide less than operator for lexicographical sorting. 467 bool operator<(const IdentifierInfo &RHS) const { 468 return getName() < RHS.getName(); 469 } 470 471 private: 472 /// The Preprocessor::HandleIdentifier does several special (but rare) 473 /// things to identifiers of various sorts. For example, it changes the 474 /// \c for keyword token from tok::identifier to tok::for. 475 /// 476 /// This method is very tied to the definition of HandleIdentifier. Any 477 /// change to it should be reflected here. 478 void RecomputeNeedsHandleIdentifier() { 479 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || 480 isExtensionToken() || isFutureCompatKeyword() || 481 isOutOfDate() || isModulesImport(); 482 } 483 }; 484 485 /// An RAII object for [un]poisoning an identifier within a scope. 486 /// 487 /// \p II is allowed to be null, in which case objects of this type have 488 /// no effect. 489 class PoisonIdentifierRAIIObject { 490 IdentifierInfo *const II; 491 const bool OldValue; 492 493 public: 494 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 495 : II(II), OldValue(II ? II->isPoisoned() : false) { 496 if(II) 497 II->setIsPoisoned(NewValue); 498 } 499 500 ~PoisonIdentifierRAIIObject() { 501 if(II) 502 II->setIsPoisoned(OldValue); 503 } 504 }; 505 506 /// An iterator that walks over all of the known identifiers 507 /// in the lookup table. 508 /// 509 /// Since this iterator uses an abstract interface via virtual 510 /// functions, it uses an object-oriented interface rather than the 511 /// more standard C++ STL iterator interface. In this OO-style 512 /// iteration, the single function \c Next() provides dereference, 513 /// advance, and end-of-sequence checking in a single 514 /// operation. Subclasses of this iterator type will provide the 515 /// actual functionality. 516 class IdentifierIterator { 517 protected: 518 IdentifierIterator() = default; 519 520 public: 521 IdentifierIterator(const IdentifierIterator &) = delete; 522 IdentifierIterator &operator=(const IdentifierIterator &) = delete; 523 524 virtual ~IdentifierIterator(); 525 526 /// Retrieve the next string in the identifier table and 527 /// advances the iterator for the following string. 528 /// 529 /// \returns The next string in the identifier table. If there is 530 /// no such string, returns an empty \c StringRef. 531 virtual StringRef Next() = 0; 532 }; 533 534 /// Provides lookups to, and iteration over, IdentiferInfo objects. 535 class IdentifierInfoLookup { 536 public: 537 virtual ~IdentifierInfoLookup(); 538 539 /// Return the IdentifierInfo for the specified named identifier. 540 /// 541 /// Unlike the version in IdentifierTable, this returns a pointer instead 542 /// of a reference. If the pointer is null then the IdentifierInfo cannot 543 /// be found. 544 virtual IdentifierInfo* get(StringRef Name) = 0; 545 546 /// Retrieve an iterator into the set of all identifiers 547 /// known to this identifier lookup source. 548 /// 549 /// This routine provides access to all of the identifiers known to 550 /// the identifier lookup, allowing access to the contents of the 551 /// identifiers without introducing the overhead of constructing 552 /// IdentifierInfo objects for each. 553 /// 554 /// \returns A new iterator into the set of known identifiers. The 555 /// caller is responsible for deleting this iterator. 556 virtual IdentifierIterator *getIdentifiers(); 557 }; 558 559 /// Implements an efficient mapping from strings to IdentifierInfo nodes. 560 /// 561 /// This has no other purpose, but this is an extremely performance-critical 562 /// piece of the code, as each occurrence of every identifier goes through 563 /// here when lexed. 564 class IdentifierTable { 565 // Shark shows that using MallocAllocator is *much* slower than using this 566 // BumpPtrAllocator! 567 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>; 568 HashTableTy HashTable; 569 570 IdentifierInfoLookup* ExternalLookup; 571 572 public: 573 /// Create the identifier table. 574 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr); 575 576 /// Create the identifier table, populating it with info about the 577 /// language keywords for the language specified by \p LangOpts. 578 explicit IdentifierTable(const LangOptions &LangOpts, 579 IdentifierInfoLookup *ExternalLookup = nullptr); 580 581 /// Set the external identifier lookup mechanism. 582 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 583 ExternalLookup = IILookup; 584 } 585 586 /// Retrieve the external identifier lookup object, if any. 587 IdentifierInfoLookup *getExternalIdentifierLookup() const { 588 return ExternalLookup; 589 } 590 591 llvm::BumpPtrAllocator& getAllocator() { 592 return HashTable.getAllocator(); 593 } 594 595 /// Return the identifier token info for the specified named 596 /// identifier. 597 IdentifierInfo &get(StringRef Name) { 598 auto &Entry = *HashTable.try_emplace(Name, nullptr).first; 599 600 IdentifierInfo *&II = Entry.second; 601 if (II) return *II; 602 603 // No entry; if we have an external lookup, look there first. 604 if (ExternalLookup) { 605 II = ExternalLookup->get(Name); 606 if (II) 607 return *II; 608 } 609 610 // Lookups failed, make a new IdentifierInfo. 611 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 612 II = new (Mem) IdentifierInfo(); 613 614 // Make sure getName() knows how to find the IdentifierInfo 615 // contents. 616 II->Entry = &Entry; 617 618 return *II; 619 } 620 621 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 622 IdentifierInfo &II = get(Name); 623 II.TokenID = TokenCode; 624 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 625 return II; 626 } 627 628 /// Gets an IdentifierInfo for the given name without consulting 629 /// external sources. 630 /// 631 /// This is a version of get() meant for external sources that want to 632 /// introduce or modify an identifier. If they called get(), they would 633 /// likely end up in a recursion. 634 IdentifierInfo &getOwn(StringRef Name) { 635 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 636 637 IdentifierInfo *&II = Entry.second; 638 if (II) 639 return *II; 640 641 // Lookups failed, make a new IdentifierInfo. 642 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 643 II = new (Mem) IdentifierInfo(); 644 645 // Make sure getName() knows how to find the IdentifierInfo 646 // contents. 647 II->Entry = &Entry; 648 649 // If this is the 'import' contextual keyword, mark it as such. 650 if (Name.equals("import")) 651 II->setModulesImport(true); 652 653 return *II; 654 } 655 656 using iterator = HashTableTy::const_iterator; 657 using const_iterator = HashTableTy::const_iterator; 658 659 iterator begin() const { return HashTable.begin(); } 660 iterator end() const { return HashTable.end(); } 661 unsigned size() const { return HashTable.size(); } 662 663 iterator find(StringRef Name) const { return HashTable.find(Name); } 664 665 /// Print some statistics to stderr that indicate how well the 666 /// hashing is doing. 667 void PrintStats() const; 668 669 /// Populate the identifier table with info about the language keywords 670 /// for the language specified by \p LangOpts. 671 void AddKeywords(const LangOptions &LangOpts); 672 673 /// Returns the correct diagnostic to issue for a future-compat diagnostic 674 /// warning. Note, this function assumes the identifier passed has already 675 /// been determined to be a future compatible keyword. 676 diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 677 const LangOptions &LangOpts); 678 }; 679 680 /// A family of Objective-C methods. 681 /// 682 /// These families have no inherent meaning in the language, but are 683 /// nonetheless central enough in the existing implementations to 684 /// merit direct AST support. While, in theory, arbitrary methods can 685 /// be considered to form families, we focus here on the methods 686 /// involving allocation and retain-count management, as these are the 687 /// most "core" and the most likely to be useful to diverse clients 688 /// without extra information. 689 /// 690 /// Both selectors and actual method declarations may be classified 691 /// into families. Method families may impose additional restrictions 692 /// beyond their selector name; for example, a method called '_init' 693 /// that returns void is not considered to be in the 'init' family 694 /// (but would be if it returned 'id'). It is also possible to 695 /// explicitly change or remove a method's family. Therefore the 696 /// method's family should be considered the single source of truth. 697 enum ObjCMethodFamily { 698 /// No particular method family. 699 OMF_None, 700 701 // Selectors in these families may have arbitrary arity, may be 702 // written with arbitrary leading underscores, and may have 703 // additional CamelCase "words" in their first selector chunk 704 // following the family name. 705 OMF_alloc, 706 OMF_copy, 707 OMF_init, 708 OMF_mutableCopy, 709 OMF_new, 710 711 // These families are singletons consisting only of the nullary 712 // selector with the given name. 713 OMF_autorelease, 714 OMF_dealloc, 715 OMF_finalize, 716 OMF_release, 717 OMF_retain, 718 OMF_retainCount, 719 OMF_self, 720 OMF_initialize, 721 722 // performSelector families 723 OMF_performSelector 724 }; 725 726 /// Enough bits to store any enumerator in ObjCMethodFamily or 727 /// InvalidObjCMethodFamily. 728 enum { ObjCMethodFamilyBitWidth = 4 }; 729 730 /// An invalid value of ObjCMethodFamily. 731 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 732 733 /// A family of Objective-C methods. 734 /// 735 /// These are family of methods whose result type is initially 'id', but 736 /// but are candidate for the result type to be changed to 'instancetype'. 737 enum ObjCInstanceTypeFamily { 738 OIT_None, 739 OIT_Array, 740 OIT_Dictionary, 741 OIT_Singleton, 742 OIT_Init, 743 OIT_ReturnsSelf 744 }; 745 746 enum ObjCStringFormatFamily { 747 SFF_None, 748 SFF_NSString, 749 SFF_CFString 750 }; 751 752 /// Smart pointer class that efficiently represents Objective-C method 753 /// names. 754 /// 755 /// This class will either point to an IdentifierInfo or a 756 /// MultiKeywordSelector (which is private). This enables us to optimize 757 /// selectors that take no arguments and selectors that take 1 argument, which 758 /// accounts for 78% of all selectors in Cocoa.h. 759 class Selector { 760 friend class Diagnostic; 761 friend class SelectorTable; // only the SelectorTable can create these 762 friend class DeclarationName; // and the AST's DeclarationName. 763 764 enum IdentifierInfoFlag { 765 // Empty selector = 0. Note that these enumeration values must 766 // correspond to the enumeration values of DeclarationName::StoredNameKind 767 ZeroArg = 0x01, 768 OneArg = 0x02, 769 MultiArg = 0x07, 770 ArgFlags = 0x07 771 }; 772 773 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low 774 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any 775 /// case IdentifierInfo and MultiKeywordSelector are already aligned to 776 /// 8 bytes even on 32 bits archs because of DeclarationName. 777 uintptr_t InfoPtr = 0; 778 779 Selector(IdentifierInfo *II, unsigned nArgs) { 780 InfoPtr = reinterpret_cast<uintptr_t>(II); 781 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 782 assert(nArgs < 2 && "nArgs not equal to 0/1"); 783 InfoPtr |= nArgs+1; 784 } 785 786 Selector(MultiKeywordSelector *SI) { 787 InfoPtr = reinterpret_cast<uintptr_t>(SI); 788 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 789 InfoPtr |= MultiArg; 790 } 791 792 IdentifierInfo *getAsIdentifierInfo() const { 793 if (getIdentifierInfoFlag() < MultiArg) 794 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 795 return nullptr; 796 } 797 798 MultiKeywordSelector *getMultiKeywordSelector() const { 799 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 800 } 801 802 unsigned getIdentifierInfoFlag() const { 803 return InfoPtr & ArgFlags; 804 } 805 806 static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 807 808 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel); 809 810 public: 811 /// The default ctor should only be used when creating data structures that 812 /// will contain selectors. 813 Selector() = default; 814 explicit Selector(uintptr_t V) : InfoPtr(V) {} 815 816 /// operator==/!= - Indicate whether the specified selectors are identical. 817 bool operator==(Selector RHS) const { 818 return InfoPtr == RHS.InfoPtr; 819 } 820 bool operator!=(Selector RHS) const { 821 return InfoPtr != RHS.InfoPtr; 822 } 823 824 void *getAsOpaquePtr() const { 825 return reinterpret_cast<void*>(InfoPtr); 826 } 827 828 /// Determine whether this is the empty selector. 829 bool isNull() const { return InfoPtr == 0; } 830 831 // Predicates to identify the selector type. 832 bool isKeywordSelector() const { 833 return getIdentifierInfoFlag() != ZeroArg; 834 } 835 836 bool isUnarySelector() const { 837 return getIdentifierInfoFlag() == ZeroArg; 838 } 839 840 /// If this selector is the specific keyword selector described by Names. 841 bool isKeywordSelector(ArrayRef<StringRef> Names) const; 842 843 /// If this selector is the specific unary selector described by Name. 844 bool isUnarySelector(StringRef Name) const; 845 846 unsigned getNumArgs() const; 847 848 /// Retrieve the identifier at a given position in the selector. 849 /// 850 /// Note that the identifier pointer returned may be NULL. Clients that only 851 /// care about the text of the identifier string, and not the specific, 852 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 853 /// an empty string when the identifier pointer would be NULL. 854 /// 855 /// \param argIndex The index for which we want to retrieve the identifier. 856 /// This index shall be less than \c getNumArgs() unless this is a keyword 857 /// selector, in which case 0 is the only permissible value. 858 /// 859 /// \returns the uniqued identifier for this slot, or NULL if this slot has 860 /// no corresponding identifier. 861 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 862 863 /// Retrieve the name at a given position in the selector. 864 /// 865 /// \param argIndex The index for which we want to retrieve the name. 866 /// This index shall be less than \c getNumArgs() unless this is a keyword 867 /// selector, in which case 0 is the only permissible value. 868 /// 869 /// \returns the name for this slot, which may be the empty string if no 870 /// name was supplied. 871 StringRef getNameForSlot(unsigned argIndex) const; 872 873 /// Derive the full selector name (e.g. "foo:bar:") and return 874 /// it as an std::string. 875 std::string getAsString() const; 876 877 /// Prints the full selector name (e.g. "foo:bar:"). 878 void print(llvm::raw_ostream &OS) const; 879 880 void dump() const; 881 882 /// Derive the conventional family of this method. 883 ObjCMethodFamily getMethodFamily() const { 884 return getMethodFamilyImpl(*this); 885 } 886 887 ObjCStringFormatFamily getStringFormatFamily() const { 888 return getStringFormatFamilyImpl(*this); 889 } 890 891 static Selector getEmptyMarker() { 892 return Selector(uintptr_t(-1)); 893 } 894 895 static Selector getTombstoneMarker() { 896 return Selector(uintptr_t(-2)); 897 } 898 899 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 900 }; 901 902 /// This table allows us to fully hide how we implement 903 /// multi-keyword caching. 904 class SelectorTable { 905 // Actually a SelectorTableImpl 906 void *Impl; 907 908 public: 909 SelectorTable(); 910 SelectorTable(const SelectorTable &) = delete; 911 SelectorTable &operator=(const SelectorTable &) = delete; 912 ~SelectorTable(); 913 914 /// Can create any sort of selector. 915 /// 916 /// \p NumArgs indicates whether this is a no argument selector "foo", a 917 /// single argument selector "foo:" or multi-argument "foo:bar:". 918 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 919 920 Selector getUnarySelector(IdentifierInfo *ID) { 921 return Selector(ID, 1); 922 } 923 924 Selector getNullarySelector(IdentifierInfo *ID) { 925 return Selector(ID, 0); 926 } 927 928 /// Return the total amount of memory allocated for managing selectors. 929 size_t getTotalMemory() const; 930 931 /// Return the default setter name for the given identifier. 932 /// 933 /// This is "set" + \p Name where the initial character of \p Name 934 /// has been capitalized. 935 static SmallString<64> constructSetterName(StringRef Name); 936 937 /// Return the default setter selector for the given identifier. 938 /// 939 /// This is "set" + \p Name where the initial character of \p Name 940 /// has been capitalized. 941 static Selector constructSetterSelector(IdentifierTable &Idents, 942 SelectorTable &SelTable, 943 const IdentifierInfo *Name); 944 945 /// Return the property name for the given setter selector. 946 static std::string getPropertyNameFromSetterSelector(Selector Sel); 947 }; 948 949 namespace detail { 950 951 /// DeclarationNameExtra is used as a base of various uncommon special names. 952 /// This class is needed since DeclarationName has not enough space to store 953 /// the kind of every possible names. Therefore the kind of common names is 954 /// stored directly in DeclarationName, and the kind of uncommon names is 955 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because 956 /// DeclarationName needs the lower 3 bits to store the kind of common names. 957 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change 958 /// here is very likely to require changes in DeclarationName(Table). 959 class alignas(IdentifierInfoAlignment) DeclarationNameExtra { 960 friend class clang::DeclarationName; 961 friend class clang::DeclarationNameTable; 962 963 protected: 964 /// The kind of "extra" information stored in the DeclarationName. See 965 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values 966 /// are used. Note that DeclarationName depends on the numerical values 967 /// of the enumerators in this enum. See DeclarationName::StoredNameKind 968 /// for more info. 969 enum ExtraKind { 970 CXXDeductionGuideName, 971 CXXLiteralOperatorName, 972 CXXUsingDirective, 973 ObjCMultiArgSelector 974 }; 975 976 /// ExtraKindOrNumArgs has one of the following meaning: 977 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra 978 /// is in this case in fact either a CXXDeductionGuideNameExtra or 979 /// a CXXLiteralOperatorIdName. 980 /// 981 /// * It may be also name common to C++ using-directives (CXXUsingDirective), 982 /// 983 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is 984 /// the number of arguments in the Objective-C selector, in which 985 /// case the DeclarationNameExtra is also a MultiKeywordSelector. 986 unsigned ExtraKindOrNumArgs; 987 988 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {} 989 DeclarationNameExtra(unsigned NumArgs) 990 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {} 991 992 /// Return the corresponding ExtraKind. 993 ExtraKind getKind() const { 994 return static_cast<ExtraKind>(ExtraKindOrNumArgs > 995 (unsigned)ObjCMultiArgSelector 996 ? (unsigned)ObjCMultiArgSelector 997 : ExtraKindOrNumArgs); 998 } 999 1000 /// Return the number of arguments in an ObjC selector. Only valid when this 1001 /// is indeed an ObjCMultiArgSelector. 1002 unsigned getNumArgs() const { 1003 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector && 1004 "getNumArgs called but this is not an ObjC selector!"); 1005 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector; 1006 } 1007 }; 1008 1009 } // namespace detail 1010 1011 } // namespace clang 1012 1013 namespace llvm { 1014 1015 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 1016 /// DenseSets. 1017 template <> 1018 struct DenseMapInfo<clang::Selector> { 1019 static clang::Selector getEmptyKey() { 1020 return clang::Selector::getEmptyMarker(); 1021 } 1022 1023 static clang::Selector getTombstoneKey() { 1024 return clang::Selector::getTombstoneMarker(); 1025 } 1026 1027 static unsigned getHashValue(clang::Selector S); 1028 1029 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 1030 return LHS == RHS; 1031 } 1032 }; 1033 1034 template<> 1035 struct PointerLikeTypeTraits<clang::Selector> { 1036 static const void *getAsVoidPointer(clang::Selector P) { 1037 return P.getAsOpaquePtr(); 1038 } 1039 1040 static clang::Selector getFromVoidPointer(const void *P) { 1041 return clang::Selector(reinterpret_cast<uintptr_t>(P)); 1042 } 1043 1044 static constexpr int NumLowBitsAvailable = 0; 1045 }; 1046 1047 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 1048 // are not guaranteed to be 8-byte aligned. 1049 template<> 1050 struct PointerLikeTypeTraits<clang::IdentifierInfo*> { 1051 static void *getAsVoidPointer(clang::IdentifierInfo* P) { 1052 return P; 1053 } 1054 1055 static clang::IdentifierInfo *getFromVoidPointer(void *P) { 1056 return static_cast<clang::IdentifierInfo*>(P); 1057 } 1058 1059 static constexpr int NumLowBitsAvailable = 1; 1060 }; 1061 1062 template<> 1063 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> { 1064 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 1065 return P; 1066 } 1067 1068 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 1069 return static_cast<const clang::IdentifierInfo*>(P); 1070 } 1071 1072 static constexpr int NumLowBitsAvailable = 1; 1073 }; 1074 1075 } // namespace llvm 1076 1077 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 1078