1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and 11 /// clang::Selector interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/TokenKinds.h" 20 #include "llvm/ADT/DenseMapInfo.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/ADT/StringMap.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/PointerLikeTypeTraits.h" 26 #include "llvm/Support/type_traits.h" 27 #include <cassert> 28 #include <cstddef> 29 #include <cstdint> 30 #include <cstring> 31 #include <string> 32 #include <utility> 33 34 namespace clang { 35 36 class DeclarationName; 37 class DeclarationNameTable; 38 class IdentifierInfo; 39 class LangOptions; 40 class MultiKeywordSelector; 41 class SourceLocation; 42 43 enum class ReservedIdentifierStatus { 44 NotReserved = 0, 45 StartsWithUnderscoreAtGlobalScope, 46 StartsWithUnderscoreAndIsExternC, 47 StartsWithDoubleUnderscore, 48 StartsWithUnderscoreFollowedByCapitalLetter, 49 ContainsDoubleUnderscore, 50 }; 51 52 /// Determine whether an identifier is reserved for use as a name at global 53 /// scope. Such identifiers might be implementation-specific global functions 54 /// or variables. 55 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) { 56 return Status != ReservedIdentifierStatus::NotReserved; 57 } 58 59 /// Determine whether an identifier is reserved in all contexts. Such 60 /// identifiers might be implementation-specific keywords or macros, for 61 /// example. 62 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) { 63 return Status != ReservedIdentifierStatus::NotReserved && 64 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope && 65 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC; 66 } 67 68 /// A simple pair of identifier info and location. 69 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>; 70 71 /// IdentifierInfo and other related classes are aligned to 72 /// 8 bytes so that DeclarationName can use the lower 3 bits 73 /// of a pointer to one of these classes. 74 enum { IdentifierInfoAlignment = 8 }; 75 76 static constexpr int ObjCOrBuiltinIDBits = 16; 77 78 /// One of these records is kept for each identifier that 79 /// is lexed. This contains information about whether the token was \#define'd, 80 /// is a language keyword, or if it is a front-end token of some sort (e.g. a 81 /// variable or function name). The preprocessor keeps this information in a 82 /// set, and all tok::identifier tokens have a pointer to one of these. 83 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits. 84 class alignas(IdentifierInfoAlignment) IdentifierInfo { 85 friend class IdentifierTable; 86 87 // Front-end token ID or tok::identifier. 88 unsigned TokenID : 9; 89 90 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 91 // First NUM_OBJC_KEYWORDS values are for Objective-C, 92 // the remaining values are for builtins. 93 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits; 94 95 // True if there is a #define for this. 96 unsigned HasMacro : 1; 97 98 // True if there was a #define for this. 99 unsigned HadMacro : 1; 100 101 // True if the identifier is a language extension. 102 unsigned IsExtension : 1; 103 104 // True if the identifier is a keyword in a newer or proposed Standard. 105 unsigned IsFutureCompatKeyword : 1; 106 107 // True if the identifier is poisoned. 108 unsigned IsPoisoned : 1; 109 110 // True if the identifier is a C++ operator keyword. 111 unsigned IsCPPOperatorKeyword : 1; 112 113 // Internal bit set by the member function RecomputeNeedsHandleIdentifier. 114 // See comment about RecomputeNeedsHandleIdentifier for more info. 115 unsigned NeedsHandleIdentifier : 1; 116 117 // True if the identifier was loaded (at least partially) from an AST file. 118 unsigned IsFromAST : 1; 119 120 // True if the identifier has changed from the definition 121 // loaded from an AST file. 122 unsigned ChangedAfterLoad : 1; 123 124 // True if the identifier's frontend information has changed from the 125 // definition loaded from an AST file. 126 unsigned FEChangedAfterLoad : 1; 127 128 // True if revertTokenIDToIdentifier was called. 129 unsigned RevertedTokenID : 1; 130 131 // True if there may be additional information about 132 // this identifier stored externally. 133 unsigned OutOfDate : 1; 134 135 // True if this is the 'import' contextual keyword. 136 unsigned IsModulesImport : 1; 137 138 // True if this is a mangled OpenMP variant name. 139 unsigned IsMangledOpenMPVariantName : 1; 140 141 // True if this is a deprecated macro. 142 unsigned IsDeprecatedMacro : 1; 143 144 // True if this macro is unsafe in headers. 145 unsigned IsRestrictExpansion : 1; 146 147 // True if this macro is final. 148 unsigned IsFinal : 1; 149 150 // 22 bits left in a 64-bit word. 151 152 // Managed by the language front-end. 153 void *FETokenInfo = nullptr; 154 155 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; 156 157 IdentifierInfo() 158 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false), 159 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), 160 IsPoisoned(false), IsCPPOperatorKeyword(false), 161 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), 162 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), 163 IsModulesImport(false), IsMangledOpenMPVariantName(false), 164 IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} 165 166 public: 167 IdentifierInfo(const IdentifierInfo &) = delete; 168 IdentifierInfo &operator=(const IdentifierInfo &) = delete; 169 IdentifierInfo(IdentifierInfo &&) = delete; 170 IdentifierInfo &operator=(IdentifierInfo &&) = delete; 171 172 /// Return true if this is the identifier for the specified string. 173 /// 174 /// This is intended to be used for string literals only: II->isStr("foo"). 175 template <std::size_t StrLen> 176 bool isStr(const char (&Str)[StrLen]) const { 177 return getLength() == StrLen-1 && 178 memcmp(getNameStart(), Str, StrLen-1) == 0; 179 } 180 181 /// Return true if this is the identifier for the specified StringRef. 182 bool isStr(llvm::StringRef Str) const { 183 llvm::StringRef ThisStr(getNameStart(), getLength()); 184 return ThisStr == Str; 185 } 186 187 /// Return the beginning of the actual null-terminated string for this 188 /// identifier. 189 const char *getNameStart() const { return Entry->getKeyData(); } 190 191 /// Efficiently return the length of this identifier info. 192 unsigned getLength() const { return Entry->getKeyLength(); } 193 194 /// Return the actual identifier string. 195 StringRef getName() const { 196 return StringRef(getNameStart(), getLength()); 197 } 198 199 /// Return true if this identifier is \#defined to some other value. 200 /// \note The current definition may be in a module and not currently visible. 201 bool hasMacroDefinition() const { 202 return HasMacro; 203 } 204 void setHasMacroDefinition(bool Val) { 205 if (HasMacro == Val) return; 206 207 HasMacro = Val; 208 if (Val) { 209 NeedsHandleIdentifier = true; 210 HadMacro = true; 211 } else { 212 // If this is a final macro, make the deprecation and header unsafe bits 213 // stick around after the undefinition so they apply to any redefinitions. 214 if (!IsFinal) { 215 // Because calling the setters of these calls recomputes, just set them 216 // manually to avoid recomputing a bunch of times. 217 IsDeprecatedMacro = false; 218 IsRestrictExpansion = false; 219 } 220 RecomputeNeedsHandleIdentifier(); 221 } 222 } 223 /// Returns true if this identifier was \#defined to some value at any 224 /// moment. In this case there should be an entry for the identifier in the 225 /// macro history table in Preprocessor. 226 bool hadMacroDefinition() const { 227 return HadMacro; 228 } 229 230 bool isDeprecatedMacro() const { return IsDeprecatedMacro; } 231 232 void setIsDeprecatedMacro(bool Val) { 233 if (IsDeprecatedMacro == Val) 234 return; 235 IsDeprecatedMacro = Val; 236 if (Val) 237 NeedsHandleIdentifier = true; 238 else 239 RecomputeNeedsHandleIdentifier(); 240 } 241 242 bool isRestrictExpansion() const { return IsRestrictExpansion; } 243 244 void setIsRestrictExpansion(bool Val) { 245 if (IsRestrictExpansion == Val) 246 return; 247 IsRestrictExpansion = Val; 248 if (Val) 249 NeedsHandleIdentifier = true; 250 else 251 RecomputeNeedsHandleIdentifier(); 252 } 253 254 bool isFinal() const { return IsFinal; } 255 256 void setIsFinal(bool Val) { IsFinal = Val; } 257 258 /// If this is a source-language token (e.g. 'for'), this API 259 /// can be used to cause the lexer to map identifiers to source-language 260 /// tokens. 261 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 262 263 /// True if revertTokenIDToIdentifier() was called. 264 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 265 266 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 267 /// compatibility. 268 /// 269 /// TokenID is normally read-only but there are 2 instances where we revert it 270 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 271 /// using this method so we can inform serialization about it. 272 void revertTokenIDToIdentifier() { 273 assert(TokenID != tok::identifier && "Already at tok::identifier"); 274 TokenID = tok::identifier; 275 RevertedTokenID = true; 276 } 277 void revertIdentifierToTokenID(tok::TokenKind TK) { 278 assert(TokenID == tok::identifier && "Should be at tok::identifier"); 279 TokenID = TK; 280 RevertedTokenID = false; 281 } 282 283 /// Return the preprocessor keyword ID for this identifier. 284 /// 285 /// For example, "define" will return tok::pp_define. 286 tok::PPKeywordKind getPPKeywordID() const; 287 288 /// Return the Objective-C keyword ID for the this identifier. 289 /// 290 /// For example, 'class' will return tok::objc_class if ObjC is enabled. 291 tok::ObjCKeywordKind getObjCKeywordID() const { 292 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 293 return tok::ObjCKeywordKind(ObjCOrBuiltinID); 294 else 295 return tok::objc_not_keyword; 296 } 297 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 298 299 /// Return a value indicating whether this is a builtin function. 300 /// 301 /// 0 is not-built-in. 1+ are specific builtin functions. 302 unsigned getBuiltinID() const { 303 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 304 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 305 else 306 return 0; 307 } 308 void setBuiltinID(unsigned ID) { 309 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 310 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 311 && "ID too large for field!"); 312 } 313 314 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } 315 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 316 317 /// get/setExtension - Initialize information about whether or not this 318 /// language token is an extension. This controls extension warnings, and is 319 /// only valid if a custom token ID is set. 320 bool isExtensionToken() const { return IsExtension; } 321 void setIsExtensionToken(bool Val) { 322 IsExtension = Val; 323 if (Val) 324 NeedsHandleIdentifier = true; 325 else 326 RecomputeNeedsHandleIdentifier(); 327 } 328 329 /// is/setIsFutureCompatKeyword - Initialize information about whether or not 330 /// this language token is a keyword in a newer or proposed Standard. This 331 /// controls compatibility warnings, and is only true when not parsing the 332 /// corresponding Standard. Once a compatibility problem has been diagnosed 333 /// with this keyword, the flag will be cleared. 334 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; } 335 void setIsFutureCompatKeyword(bool Val) { 336 IsFutureCompatKeyword = Val; 337 if (Val) 338 NeedsHandleIdentifier = true; 339 else 340 RecomputeNeedsHandleIdentifier(); 341 } 342 343 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 344 /// Preprocessor will emit an error every time this token is used. 345 void setIsPoisoned(bool Value = true) { 346 IsPoisoned = Value; 347 if (Value) 348 NeedsHandleIdentifier = true; 349 else 350 RecomputeNeedsHandleIdentifier(); 351 } 352 353 /// Return true if this token has been poisoned. 354 bool isPoisoned() const { return IsPoisoned; } 355 356 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 357 /// this identifier is a C++ alternate representation of an operator. 358 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 359 IsCPPOperatorKeyword = Val; 360 } 361 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 362 363 /// Return true if this token is a keyword in the specified language. 364 bool isKeyword(const LangOptions &LangOpts) const; 365 366 /// Return true if this token is a C++ keyword in the specified 367 /// language. 368 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; 369 370 /// Get and set FETokenInfo. The language front-end is allowed to associate 371 /// arbitrary metadata with this token. 372 void *getFETokenInfo() const { return FETokenInfo; } 373 void setFETokenInfo(void *T) { FETokenInfo = T; } 374 375 /// Return true if the Preprocessor::HandleIdentifier must be called 376 /// on a token of this identifier. 377 /// 378 /// If this returns false, we know that HandleIdentifier will not affect 379 /// the token. 380 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 381 382 /// Return true if the identifier in its current state was loaded 383 /// from an AST file. 384 bool isFromAST() const { return IsFromAST; } 385 386 void setIsFromAST() { IsFromAST = true; } 387 388 /// Determine whether this identifier has changed since it was loaded 389 /// from an AST file. 390 bool hasChangedSinceDeserialization() const { 391 return ChangedAfterLoad; 392 } 393 394 /// Note that this identifier has changed since it was loaded from 395 /// an AST file. 396 void setChangedSinceDeserialization() { 397 ChangedAfterLoad = true; 398 } 399 400 /// Determine whether the frontend token information for this 401 /// identifier has changed since it was loaded from an AST file. 402 bool hasFETokenInfoChangedSinceDeserialization() const { 403 return FEChangedAfterLoad; 404 } 405 406 /// Note that the frontend token information for this identifier has 407 /// changed since it was loaded from an AST file. 408 void setFETokenInfoChangedSinceDeserialization() { 409 FEChangedAfterLoad = true; 410 } 411 412 /// Determine whether the information for this identifier is out of 413 /// date with respect to the external source. 414 bool isOutOfDate() const { return OutOfDate; } 415 416 /// Set whether the information for this identifier is out of 417 /// date with respect to the external source. 418 void setOutOfDate(bool OOD) { 419 OutOfDate = OOD; 420 if (OOD) 421 NeedsHandleIdentifier = true; 422 else 423 RecomputeNeedsHandleIdentifier(); 424 } 425 426 /// Determine whether this is the contextual keyword \c import. 427 bool isModulesImport() const { return IsModulesImport; } 428 429 /// Set whether this identifier is the contextual keyword \c import. 430 void setModulesImport(bool I) { 431 IsModulesImport = I; 432 if (I) 433 NeedsHandleIdentifier = true; 434 else 435 RecomputeNeedsHandleIdentifier(); 436 } 437 438 /// Determine whether this is the mangled name of an OpenMP variant. 439 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } 440 441 /// Set whether this is the mangled name of an OpenMP variant. 442 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; } 443 444 /// Return true if this identifier is an editor placeholder. 445 /// 446 /// Editor placeholders are produced by the code-completion engine and are 447 /// represented as characters between '<#' and '#>' in the source code. An 448 /// example of auto-completed call with a placeholder parameter is shown 449 /// below: 450 /// \code 451 /// function(<#int x#>); 452 /// \endcode 453 bool isEditorPlaceholder() const { 454 return getName().startswith("<#") && getName().endswith("#>"); 455 } 456 457 /// Determine whether \p this is a name reserved for the implementation (C99 458 /// 7.1.3, C++ [lib.global.names]). 459 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const; 460 461 /// If the identifier is an "uglified" reserved name, return a cleaned form. 462 /// e.g. _Foo => Foo. Otherwise, just returns the name. 463 StringRef deuglifiedName() const; 464 465 /// Provide less than operator for lexicographical sorting. 466 bool operator<(const IdentifierInfo &RHS) const { 467 return getName() < RHS.getName(); 468 } 469 470 private: 471 /// The Preprocessor::HandleIdentifier does several special (but rare) 472 /// things to identifiers of various sorts. For example, it changes the 473 /// \c for keyword token from tok::identifier to tok::for. 474 /// 475 /// This method is very tied to the definition of HandleIdentifier. Any 476 /// change to it should be reflected here. 477 void RecomputeNeedsHandleIdentifier() { 478 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || 479 isExtensionToken() || isFutureCompatKeyword() || 480 isOutOfDate() || isModulesImport(); 481 } 482 }; 483 484 /// An RAII object for [un]poisoning an identifier within a scope. 485 /// 486 /// \p II is allowed to be null, in which case objects of this type have 487 /// no effect. 488 class PoisonIdentifierRAIIObject { 489 IdentifierInfo *const II; 490 const bool OldValue; 491 492 public: 493 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 494 : II(II), OldValue(II ? II->isPoisoned() : false) { 495 if(II) 496 II->setIsPoisoned(NewValue); 497 } 498 499 ~PoisonIdentifierRAIIObject() { 500 if(II) 501 II->setIsPoisoned(OldValue); 502 } 503 }; 504 505 /// An iterator that walks over all of the known identifiers 506 /// in the lookup table. 507 /// 508 /// Since this iterator uses an abstract interface via virtual 509 /// functions, it uses an object-oriented interface rather than the 510 /// more standard C++ STL iterator interface. In this OO-style 511 /// iteration, the single function \c Next() provides dereference, 512 /// advance, and end-of-sequence checking in a single 513 /// operation. Subclasses of this iterator type will provide the 514 /// actual functionality. 515 class IdentifierIterator { 516 protected: 517 IdentifierIterator() = default; 518 519 public: 520 IdentifierIterator(const IdentifierIterator &) = delete; 521 IdentifierIterator &operator=(const IdentifierIterator &) = delete; 522 523 virtual ~IdentifierIterator(); 524 525 /// Retrieve the next string in the identifier table and 526 /// advances the iterator for the following string. 527 /// 528 /// \returns The next string in the identifier table. If there is 529 /// no such string, returns an empty \c StringRef. 530 virtual StringRef Next() = 0; 531 }; 532 533 /// Provides lookups to, and iteration over, IdentiferInfo objects. 534 class IdentifierInfoLookup { 535 public: 536 virtual ~IdentifierInfoLookup(); 537 538 /// Return the IdentifierInfo for the specified named identifier. 539 /// 540 /// Unlike the version in IdentifierTable, this returns a pointer instead 541 /// of a reference. If the pointer is null then the IdentifierInfo cannot 542 /// be found. 543 virtual IdentifierInfo* get(StringRef Name) = 0; 544 545 /// Retrieve an iterator into the set of all identifiers 546 /// known to this identifier lookup source. 547 /// 548 /// This routine provides access to all of the identifiers known to 549 /// the identifier lookup, allowing access to the contents of the 550 /// identifiers without introducing the overhead of constructing 551 /// IdentifierInfo objects for each. 552 /// 553 /// \returns A new iterator into the set of known identifiers. The 554 /// caller is responsible for deleting this iterator. 555 virtual IdentifierIterator *getIdentifiers(); 556 }; 557 558 /// Implements an efficient mapping from strings to IdentifierInfo nodes. 559 /// 560 /// This has no other purpose, but this is an extremely performance-critical 561 /// piece of the code, as each occurrence of every identifier goes through 562 /// here when lexed. 563 class IdentifierTable { 564 // Shark shows that using MallocAllocator is *much* slower than using this 565 // BumpPtrAllocator! 566 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>; 567 HashTableTy HashTable; 568 569 IdentifierInfoLookup* ExternalLookup; 570 571 public: 572 /// Create the identifier table. 573 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr); 574 575 /// Create the identifier table, populating it with info about the 576 /// language keywords for the language specified by \p LangOpts. 577 explicit IdentifierTable(const LangOptions &LangOpts, 578 IdentifierInfoLookup *ExternalLookup = nullptr); 579 580 /// Set the external identifier lookup mechanism. 581 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 582 ExternalLookup = IILookup; 583 } 584 585 /// Retrieve the external identifier lookup object, if any. 586 IdentifierInfoLookup *getExternalIdentifierLookup() const { 587 return ExternalLookup; 588 } 589 590 llvm::BumpPtrAllocator& getAllocator() { 591 return HashTable.getAllocator(); 592 } 593 594 /// Return the identifier token info for the specified named 595 /// identifier. 596 IdentifierInfo &get(StringRef Name) { 597 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 598 599 IdentifierInfo *&II = Entry.second; 600 if (II) return *II; 601 602 // No entry; if we have an external lookup, look there first. 603 if (ExternalLookup) { 604 II = ExternalLookup->get(Name); 605 if (II) 606 return *II; 607 } 608 609 // Lookups failed, make a new IdentifierInfo. 610 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 611 II = new (Mem) IdentifierInfo(); 612 613 // Make sure getName() knows how to find the IdentifierInfo 614 // contents. 615 II->Entry = &Entry; 616 617 return *II; 618 } 619 620 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 621 IdentifierInfo &II = get(Name); 622 II.TokenID = TokenCode; 623 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 624 return II; 625 } 626 627 /// Gets an IdentifierInfo for the given name without consulting 628 /// external sources. 629 /// 630 /// This is a version of get() meant for external sources that want to 631 /// introduce or modify an identifier. If they called get(), they would 632 /// likely end up in a recursion. 633 IdentifierInfo &getOwn(StringRef Name) { 634 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 635 636 IdentifierInfo *&II = Entry.second; 637 if (II) 638 return *II; 639 640 // Lookups failed, make a new IdentifierInfo. 641 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 642 II = new (Mem) IdentifierInfo(); 643 644 // Make sure getName() knows how to find the IdentifierInfo 645 // contents. 646 II->Entry = &Entry; 647 648 // If this is the 'import' contextual keyword, mark it as such. 649 if (Name.equals("import")) 650 II->setModulesImport(true); 651 652 return *II; 653 } 654 655 using iterator = HashTableTy::const_iterator; 656 using const_iterator = HashTableTy::const_iterator; 657 658 iterator begin() const { return HashTable.begin(); } 659 iterator end() const { return HashTable.end(); } 660 unsigned size() const { return HashTable.size(); } 661 662 iterator find(StringRef Name) const { return HashTable.find(Name); } 663 664 /// Print some statistics to stderr that indicate how well the 665 /// hashing is doing. 666 void PrintStats() const; 667 668 /// Populate the identifier table with info about the language keywords 669 /// for the language specified by \p LangOpts. 670 void AddKeywords(const LangOptions &LangOpts); 671 }; 672 673 /// A family of Objective-C methods. 674 /// 675 /// These families have no inherent meaning in the language, but are 676 /// nonetheless central enough in the existing implementations to 677 /// merit direct AST support. While, in theory, arbitrary methods can 678 /// be considered to form families, we focus here on the methods 679 /// involving allocation and retain-count management, as these are the 680 /// most "core" and the most likely to be useful to diverse clients 681 /// without extra information. 682 /// 683 /// Both selectors and actual method declarations may be classified 684 /// into families. Method families may impose additional restrictions 685 /// beyond their selector name; for example, a method called '_init' 686 /// that returns void is not considered to be in the 'init' family 687 /// (but would be if it returned 'id'). It is also possible to 688 /// explicitly change or remove a method's family. Therefore the 689 /// method's family should be considered the single source of truth. 690 enum ObjCMethodFamily { 691 /// No particular method family. 692 OMF_None, 693 694 // Selectors in these families may have arbitrary arity, may be 695 // written with arbitrary leading underscores, and may have 696 // additional CamelCase "words" in their first selector chunk 697 // following the family name. 698 OMF_alloc, 699 OMF_copy, 700 OMF_init, 701 OMF_mutableCopy, 702 OMF_new, 703 704 // These families are singletons consisting only of the nullary 705 // selector with the given name. 706 OMF_autorelease, 707 OMF_dealloc, 708 OMF_finalize, 709 OMF_release, 710 OMF_retain, 711 OMF_retainCount, 712 OMF_self, 713 OMF_initialize, 714 715 // performSelector families 716 OMF_performSelector 717 }; 718 719 /// Enough bits to store any enumerator in ObjCMethodFamily or 720 /// InvalidObjCMethodFamily. 721 enum { ObjCMethodFamilyBitWidth = 4 }; 722 723 /// An invalid value of ObjCMethodFamily. 724 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 725 726 /// A family of Objective-C methods. 727 /// 728 /// These are family of methods whose result type is initially 'id', but 729 /// but are candidate for the result type to be changed to 'instancetype'. 730 enum ObjCInstanceTypeFamily { 731 OIT_None, 732 OIT_Array, 733 OIT_Dictionary, 734 OIT_Singleton, 735 OIT_Init, 736 OIT_ReturnsSelf 737 }; 738 739 enum ObjCStringFormatFamily { 740 SFF_None, 741 SFF_NSString, 742 SFF_CFString 743 }; 744 745 /// Smart pointer class that efficiently represents Objective-C method 746 /// names. 747 /// 748 /// This class will either point to an IdentifierInfo or a 749 /// MultiKeywordSelector (which is private). This enables us to optimize 750 /// selectors that take no arguments and selectors that take 1 argument, which 751 /// accounts for 78% of all selectors in Cocoa.h. 752 class Selector { 753 friend class Diagnostic; 754 friend class SelectorTable; // only the SelectorTable can create these 755 friend class DeclarationName; // and the AST's DeclarationName. 756 757 enum IdentifierInfoFlag { 758 // Empty selector = 0. Note that these enumeration values must 759 // correspond to the enumeration values of DeclarationName::StoredNameKind 760 ZeroArg = 0x01, 761 OneArg = 0x02, 762 MultiArg = 0x07, 763 ArgFlags = 0x07 764 }; 765 766 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low 767 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any 768 /// case IdentifierInfo and MultiKeywordSelector are already aligned to 769 /// 8 bytes even on 32 bits archs because of DeclarationName. 770 uintptr_t InfoPtr = 0; 771 772 Selector(IdentifierInfo *II, unsigned nArgs) { 773 InfoPtr = reinterpret_cast<uintptr_t>(II); 774 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 775 assert(nArgs < 2 && "nArgs not equal to 0/1"); 776 InfoPtr |= nArgs+1; 777 } 778 779 Selector(MultiKeywordSelector *SI) { 780 InfoPtr = reinterpret_cast<uintptr_t>(SI); 781 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 782 InfoPtr |= MultiArg; 783 } 784 785 IdentifierInfo *getAsIdentifierInfo() const { 786 if (getIdentifierInfoFlag() < MultiArg) 787 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 788 return nullptr; 789 } 790 791 MultiKeywordSelector *getMultiKeywordSelector() const { 792 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 793 } 794 795 unsigned getIdentifierInfoFlag() const { 796 return InfoPtr & ArgFlags; 797 } 798 799 static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 800 801 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel); 802 803 public: 804 /// The default ctor should only be used when creating data structures that 805 /// will contain selectors. 806 Selector() = default; 807 explicit Selector(uintptr_t V) : InfoPtr(V) {} 808 809 /// operator==/!= - Indicate whether the specified selectors are identical. 810 bool operator==(Selector RHS) const { 811 return InfoPtr == RHS.InfoPtr; 812 } 813 bool operator!=(Selector RHS) const { 814 return InfoPtr != RHS.InfoPtr; 815 } 816 817 void *getAsOpaquePtr() const { 818 return reinterpret_cast<void*>(InfoPtr); 819 } 820 821 /// Determine whether this is the empty selector. 822 bool isNull() const { return InfoPtr == 0; } 823 824 // Predicates to identify the selector type. 825 bool isKeywordSelector() const { 826 return getIdentifierInfoFlag() != ZeroArg; 827 } 828 829 bool isUnarySelector() const { 830 return getIdentifierInfoFlag() == ZeroArg; 831 } 832 833 /// If this selector is the specific keyword selector described by Names. 834 bool isKeywordSelector(ArrayRef<StringRef> Names) const; 835 836 /// If this selector is the specific unary selector described by Name. 837 bool isUnarySelector(StringRef Name) const; 838 839 unsigned getNumArgs() const; 840 841 /// Retrieve the identifier at a given position in the selector. 842 /// 843 /// Note that the identifier pointer returned may be NULL. Clients that only 844 /// care about the text of the identifier string, and not the specific, 845 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 846 /// an empty string when the identifier pointer would be NULL. 847 /// 848 /// \param argIndex The index for which we want to retrieve the identifier. 849 /// This index shall be less than \c getNumArgs() unless this is a keyword 850 /// selector, in which case 0 is the only permissible value. 851 /// 852 /// \returns the uniqued identifier for this slot, or NULL if this slot has 853 /// no corresponding identifier. 854 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 855 856 /// Retrieve the name at a given position in the selector. 857 /// 858 /// \param argIndex The index for which we want to retrieve the name. 859 /// This index shall be less than \c getNumArgs() unless this is a keyword 860 /// selector, in which case 0 is the only permissible value. 861 /// 862 /// \returns the name for this slot, which may be the empty string if no 863 /// name was supplied. 864 StringRef getNameForSlot(unsigned argIndex) const; 865 866 /// Derive the full selector name (e.g. "foo:bar:") and return 867 /// it as an std::string. 868 std::string getAsString() const; 869 870 /// Prints the full selector name (e.g. "foo:bar:"). 871 void print(llvm::raw_ostream &OS) const; 872 873 void dump() const; 874 875 /// Derive the conventional family of this method. 876 ObjCMethodFamily getMethodFamily() const { 877 return getMethodFamilyImpl(*this); 878 } 879 880 ObjCStringFormatFamily getStringFormatFamily() const { 881 return getStringFormatFamilyImpl(*this); 882 } 883 884 static Selector getEmptyMarker() { 885 return Selector(uintptr_t(-1)); 886 } 887 888 static Selector getTombstoneMarker() { 889 return Selector(uintptr_t(-2)); 890 } 891 892 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 893 }; 894 895 /// This table allows us to fully hide how we implement 896 /// multi-keyword caching. 897 class SelectorTable { 898 // Actually a SelectorTableImpl 899 void *Impl; 900 901 public: 902 SelectorTable(); 903 SelectorTable(const SelectorTable &) = delete; 904 SelectorTable &operator=(const SelectorTable &) = delete; 905 ~SelectorTable(); 906 907 /// Can create any sort of selector. 908 /// 909 /// \p NumArgs indicates whether this is a no argument selector "foo", a 910 /// single argument selector "foo:" or multi-argument "foo:bar:". 911 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 912 913 Selector getUnarySelector(IdentifierInfo *ID) { 914 return Selector(ID, 1); 915 } 916 917 Selector getNullarySelector(IdentifierInfo *ID) { 918 return Selector(ID, 0); 919 } 920 921 /// Return the total amount of memory allocated for managing selectors. 922 size_t getTotalMemory() const; 923 924 /// Return the default setter name for the given identifier. 925 /// 926 /// This is "set" + \p Name where the initial character of \p Name 927 /// has been capitalized. 928 static SmallString<64> constructSetterName(StringRef Name); 929 930 /// Return the default setter selector for the given identifier. 931 /// 932 /// This is "set" + \p Name where the initial character of \p Name 933 /// has been capitalized. 934 static Selector constructSetterSelector(IdentifierTable &Idents, 935 SelectorTable &SelTable, 936 const IdentifierInfo *Name); 937 938 /// Return the property name for the given setter selector. 939 static std::string getPropertyNameFromSetterSelector(Selector Sel); 940 }; 941 942 namespace detail { 943 944 /// DeclarationNameExtra is used as a base of various uncommon special names. 945 /// This class is needed since DeclarationName has not enough space to store 946 /// the kind of every possible names. Therefore the kind of common names is 947 /// stored directly in DeclarationName, and the kind of uncommon names is 948 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because 949 /// DeclarationName needs the lower 3 bits to store the kind of common names. 950 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change 951 /// here is very likely to require changes in DeclarationName(Table). 952 class alignas(IdentifierInfoAlignment) DeclarationNameExtra { 953 friend class clang::DeclarationName; 954 friend class clang::DeclarationNameTable; 955 956 protected: 957 /// The kind of "extra" information stored in the DeclarationName. See 958 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values 959 /// are used. Note that DeclarationName depends on the numerical values 960 /// of the enumerators in this enum. See DeclarationName::StoredNameKind 961 /// for more info. 962 enum ExtraKind { 963 CXXDeductionGuideName, 964 CXXLiteralOperatorName, 965 CXXUsingDirective, 966 ObjCMultiArgSelector 967 }; 968 969 /// ExtraKindOrNumArgs has one of the following meaning: 970 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra 971 /// is in this case in fact either a CXXDeductionGuideNameExtra or 972 /// a CXXLiteralOperatorIdName. 973 /// 974 /// * It may be also name common to C++ using-directives (CXXUsingDirective), 975 /// 976 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is 977 /// the number of arguments in the Objective-C selector, in which 978 /// case the DeclarationNameExtra is also a MultiKeywordSelector. 979 unsigned ExtraKindOrNumArgs; 980 981 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {} 982 DeclarationNameExtra(unsigned NumArgs) 983 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {} 984 985 /// Return the corresponding ExtraKind. 986 ExtraKind getKind() const { 987 return static_cast<ExtraKind>(ExtraKindOrNumArgs > 988 (unsigned)ObjCMultiArgSelector 989 ? (unsigned)ObjCMultiArgSelector 990 : ExtraKindOrNumArgs); 991 } 992 993 /// Return the number of arguments in an ObjC selector. Only valid when this 994 /// is indeed an ObjCMultiArgSelector. 995 unsigned getNumArgs() const { 996 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector && 997 "getNumArgs called but this is not an ObjC selector!"); 998 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector; 999 } 1000 }; 1001 1002 } // namespace detail 1003 1004 } // namespace clang 1005 1006 namespace llvm { 1007 1008 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 1009 /// DenseSets. 1010 template <> 1011 struct DenseMapInfo<clang::Selector> { 1012 static clang::Selector getEmptyKey() { 1013 return clang::Selector::getEmptyMarker(); 1014 } 1015 1016 static clang::Selector getTombstoneKey() { 1017 return clang::Selector::getTombstoneMarker(); 1018 } 1019 1020 static unsigned getHashValue(clang::Selector S); 1021 1022 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 1023 return LHS == RHS; 1024 } 1025 }; 1026 1027 template<> 1028 struct PointerLikeTypeTraits<clang::Selector> { 1029 static const void *getAsVoidPointer(clang::Selector P) { 1030 return P.getAsOpaquePtr(); 1031 } 1032 1033 static clang::Selector getFromVoidPointer(const void *P) { 1034 return clang::Selector(reinterpret_cast<uintptr_t>(P)); 1035 } 1036 1037 static constexpr int NumLowBitsAvailable = 0; 1038 }; 1039 1040 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 1041 // are not guaranteed to be 8-byte aligned. 1042 template<> 1043 struct PointerLikeTypeTraits<clang::IdentifierInfo*> { 1044 static void *getAsVoidPointer(clang::IdentifierInfo* P) { 1045 return P; 1046 } 1047 1048 static clang::IdentifierInfo *getFromVoidPointer(void *P) { 1049 return static_cast<clang::IdentifierInfo*>(P); 1050 } 1051 1052 static constexpr int NumLowBitsAvailable = 1; 1053 }; 1054 1055 template<> 1056 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> { 1057 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 1058 return P; 1059 } 1060 1061 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 1062 return static_cast<const clang::IdentifierInfo*>(P); 1063 } 1064 1065 static constexpr int NumLowBitsAvailable = 1; 1066 }; 1067 1068 } // namespace llvm 1069 1070 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 1071