1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and 11 /// clang::Selector interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/TokenKinds.h" 20 #include "llvm/ADT/DenseMapInfo.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/ADT/StringMap.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/PointerLikeTypeTraits.h" 26 #include "llvm/Support/type_traits.h" 27 #include <cassert> 28 #include <cstddef> 29 #include <cstdint> 30 #include <cstring> 31 #include <string> 32 #include <utility> 33 34 namespace clang { 35 36 class DeclarationName; 37 class DeclarationNameTable; 38 class IdentifierInfo; 39 class LangOptions; 40 class MultiKeywordSelector; 41 class SourceLocation; 42 43 /// A simple pair of identifier info and location. 44 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>; 45 46 /// IdentifierInfo and other related classes are aligned to 47 /// 8 bytes so that DeclarationName can use the lower 3 bits 48 /// of a pointer to one of these classes. 49 enum { IdentifierInfoAlignment = 8 }; 50 51 /// One of these records is kept for each identifier that 52 /// is lexed. This contains information about whether the token was \#define'd, 53 /// is a language keyword, or if it is a front-end token of some sort (e.g. a 54 /// variable or function name). The preprocessor keeps this information in a 55 /// set, and all tok::identifier tokens have a pointer to one of these. 56 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits. 57 class alignas(IdentifierInfoAlignment) IdentifierInfo { 58 friend class IdentifierTable; 59 60 // Front-end token ID or tok::identifier. 61 unsigned TokenID : 9; 62 63 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 64 // First NUM_OBJC_KEYWORDS values are for Objective-C, 65 // the remaining values are for builtins. 66 unsigned ObjCOrBuiltinID : 13; 67 68 // True if there is a #define for this. 69 unsigned HasMacro : 1; 70 71 // True if there was a #define for this. 72 unsigned HadMacro : 1; 73 74 // True if the identifier is a language extension. 75 unsigned IsExtension : 1; 76 77 // True if the identifier is a keyword in a newer or proposed Standard. 78 unsigned IsFutureCompatKeyword : 1; 79 80 // True if the identifier is poisoned. 81 unsigned IsPoisoned : 1; 82 83 // True if the identifier is a C++ operator keyword. 84 unsigned IsCPPOperatorKeyword : 1; 85 86 // Internal bit set by the member function RecomputeNeedsHandleIdentifier. 87 // See comment about RecomputeNeedsHandleIdentifier for more info. 88 unsigned NeedsHandleIdentifier : 1; 89 90 // True if the identifier was loaded (at least partially) from an AST file. 91 unsigned IsFromAST : 1; 92 93 // True if the identifier has changed from the definition 94 // loaded from an AST file. 95 unsigned ChangedAfterLoad : 1; 96 97 // True if the identifier's frontend information has changed from the 98 // definition loaded from an AST file. 99 unsigned FEChangedAfterLoad : 1; 100 101 // True if revertTokenIDToIdentifier was called. 102 unsigned RevertedTokenID : 1; 103 104 // True if there may be additional information about 105 // this identifier stored externally. 106 unsigned OutOfDate : 1; 107 108 // True if this is the 'import' contextual keyword. 109 unsigned IsModulesImport : 1; 110 111 // 29 bits left in a 64-bit word. 112 113 // Managed by the language front-end. 114 void *FETokenInfo = nullptr; 115 116 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; 117 118 IdentifierInfo() 119 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false), 120 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), 121 IsPoisoned(false), IsCPPOperatorKeyword(false), 122 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), 123 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), 124 IsModulesImport(false) {} 125 126 public: 127 IdentifierInfo(const IdentifierInfo &) = delete; 128 IdentifierInfo &operator=(const IdentifierInfo &) = delete; 129 IdentifierInfo(IdentifierInfo &&) = delete; 130 IdentifierInfo &operator=(IdentifierInfo &&) = delete; 131 132 /// Return true if this is the identifier for the specified string. 133 /// 134 /// This is intended to be used for string literals only: II->isStr("foo"). 135 template <std::size_t StrLen> 136 bool isStr(const char (&Str)[StrLen]) const { 137 return getLength() == StrLen-1 && 138 memcmp(getNameStart(), Str, StrLen-1) == 0; 139 } 140 141 /// Return true if this is the identifier for the specified StringRef. 142 bool isStr(llvm::StringRef Str) const { 143 llvm::StringRef ThisStr(getNameStart(), getLength()); 144 return ThisStr == Str; 145 } 146 147 /// Return the beginning of the actual null-terminated string for this 148 /// identifier. 149 const char *getNameStart() const { return Entry->getKeyData(); } 150 151 /// Efficiently return the length of this identifier info. 152 unsigned getLength() const { return Entry->getKeyLength(); } 153 154 /// Return the actual identifier string. 155 StringRef getName() const { 156 return StringRef(getNameStart(), getLength()); 157 } 158 159 /// Return true if this identifier is \#defined to some other value. 160 /// \note The current definition may be in a module and not currently visible. 161 bool hasMacroDefinition() const { 162 return HasMacro; 163 } 164 void setHasMacroDefinition(bool Val) { 165 if (HasMacro == Val) return; 166 167 HasMacro = Val; 168 if (Val) { 169 NeedsHandleIdentifier = true; 170 HadMacro = true; 171 } else { 172 RecomputeNeedsHandleIdentifier(); 173 } 174 } 175 /// Returns true if this identifier was \#defined to some value at any 176 /// moment. In this case there should be an entry for the identifier in the 177 /// macro history table in Preprocessor. 178 bool hadMacroDefinition() const { 179 return HadMacro; 180 } 181 182 /// If this is a source-language token (e.g. 'for'), this API 183 /// can be used to cause the lexer to map identifiers to source-language 184 /// tokens. 185 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 186 187 /// True if revertTokenIDToIdentifier() was called. 188 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 189 190 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 191 /// compatibility. 192 /// 193 /// TokenID is normally read-only but there are 2 instances where we revert it 194 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 195 /// using this method so we can inform serialization about it. 196 void revertTokenIDToIdentifier() { 197 assert(TokenID != tok::identifier && "Already at tok::identifier"); 198 TokenID = tok::identifier; 199 RevertedTokenID = true; 200 } 201 void revertIdentifierToTokenID(tok::TokenKind TK) { 202 assert(TokenID == tok::identifier && "Should be at tok::identifier"); 203 TokenID = TK; 204 RevertedTokenID = false; 205 } 206 207 /// Return the preprocessor keyword ID for this identifier. 208 /// 209 /// For example, "define" will return tok::pp_define. 210 tok::PPKeywordKind getPPKeywordID() const; 211 212 /// Return the Objective-C keyword ID for the this identifier. 213 /// 214 /// For example, 'class' will return tok::objc_class if ObjC is enabled. 215 tok::ObjCKeywordKind getObjCKeywordID() const { 216 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 217 return tok::ObjCKeywordKind(ObjCOrBuiltinID); 218 else 219 return tok::objc_not_keyword; 220 } 221 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 222 223 /// True if setNotBuiltin() was called. 224 bool hasRevertedBuiltin() const { 225 return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS; 226 } 227 228 /// Revert the identifier to a non-builtin identifier. We do this if 229 /// the name of a known builtin library function is used to declare that 230 /// function, but an unexpected type is specified. 231 void revertBuiltin() { 232 setBuiltinID(0); 233 } 234 235 /// Return a value indicating whether this is a builtin function. 236 /// 237 /// 0 is not-built-in. 1+ are specific builtin functions. 238 unsigned getBuiltinID() const { 239 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 240 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 241 else 242 return 0; 243 } 244 void setBuiltinID(unsigned ID) { 245 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 246 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 247 && "ID too large for field!"); 248 } 249 250 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } 251 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 252 253 /// get/setExtension - Initialize information about whether or not this 254 /// language token is an extension. This controls extension warnings, and is 255 /// only valid if a custom token ID is set. 256 bool isExtensionToken() const { return IsExtension; } 257 void setIsExtensionToken(bool Val) { 258 IsExtension = Val; 259 if (Val) 260 NeedsHandleIdentifier = true; 261 else 262 RecomputeNeedsHandleIdentifier(); 263 } 264 265 /// is/setIsFutureCompatKeyword - Initialize information about whether or not 266 /// this language token is a keyword in a newer or proposed Standard. This 267 /// controls compatibility warnings, and is only true when not parsing the 268 /// corresponding Standard. Once a compatibility problem has been diagnosed 269 /// with this keyword, the flag will be cleared. 270 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; } 271 void setIsFutureCompatKeyword(bool Val) { 272 IsFutureCompatKeyword = Val; 273 if (Val) 274 NeedsHandleIdentifier = true; 275 else 276 RecomputeNeedsHandleIdentifier(); 277 } 278 279 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 280 /// Preprocessor will emit an error every time this token is used. 281 void setIsPoisoned(bool Value = true) { 282 IsPoisoned = Value; 283 if (Value) 284 NeedsHandleIdentifier = true; 285 else 286 RecomputeNeedsHandleIdentifier(); 287 } 288 289 /// Return true if this token has been poisoned. 290 bool isPoisoned() const { return IsPoisoned; } 291 292 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 293 /// this identifier is a C++ alternate representation of an operator. 294 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 295 IsCPPOperatorKeyword = Val; 296 } 297 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 298 299 /// Return true if this token is a keyword in the specified language. 300 bool isKeyword(const LangOptions &LangOpts) const; 301 302 /// Return true if this token is a C++ keyword in the specified 303 /// language. 304 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; 305 306 /// Get and set FETokenInfo. The language front-end is allowed to associate 307 /// arbitrary metadata with this token. 308 void *getFETokenInfo() const { return FETokenInfo; } 309 void setFETokenInfo(void *T) { FETokenInfo = T; } 310 311 /// Return true if the Preprocessor::HandleIdentifier must be called 312 /// on a token of this identifier. 313 /// 314 /// If this returns false, we know that HandleIdentifier will not affect 315 /// the token. 316 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 317 318 /// Return true if the identifier in its current state was loaded 319 /// from an AST file. 320 bool isFromAST() const { return IsFromAST; } 321 322 void setIsFromAST() { IsFromAST = true; } 323 324 /// Determine whether this identifier has changed since it was loaded 325 /// from an AST file. 326 bool hasChangedSinceDeserialization() const { 327 return ChangedAfterLoad; 328 } 329 330 /// Note that this identifier has changed since it was loaded from 331 /// an AST file. 332 void setChangedSinceDeserialization() { 333 ChangedAfterLoad = true; 334 } 335 336 /// Determine whether the frontend token information for this 337 /// identifier has changed since it was loaded from an AST file. 338 bool hasFETokenInfoChangedSinceDeserialization() const { 339 return FEChangedAfterLoad; 340 } 341 342 /// Note that the frontend token information for this identifier has 343 /// changed since it was loaded from an AST file. 344 void setFETokenInfoChangedSinceDeserialization() { 345 FEChangedAfterLoad = true; 346 } 347 348 /// Determine whether the information for this identifier is out of 349 /// date with respect to the external source. 350 bool isOutOfDate() const { return OutOfDate; } 351 352 /// Set whether the information for this identifier is out of 353 /// date with respect to the external source. 354 void setOutOfDate(bool OOD) { 355 OutOfDate = OOD; 356 if (OOD) 357 NeedsHandleIdentifier = true; 358 else 359 RecomputeNeedsHandleIdentifier(); 360 } 361 362 /// Determine whether this is the contextual keyword \c import. 363 bool isModulesImport() const { return IsModulesImport; } 364 365 /// Set whether this identifier is the contextual keyword \c import. 366 void setModulesImport(bool I) { 367 IsModulesImport = I; 368 if (I) 369 NeedsHandleIdentifier = true; 370 else 371 RecomputeNeedsHandleIdentifier(); 372 } 373 374 /// Return true if this identifier is an editor placeholder. 375 /// 376 /// Editor placeholders are produced by the code-completion engine and are 377 /// represented as characters between '<#' and '#>' in the source code. An 378 /// example of auto-completed call with a placeholder parameter is shown 379 /// below: 380 /// \code 381 /// function(<#int x#>); 382 /// \endcode 383 bool isEditorPlaceholder() const { 384 return getName().startswith("<#") && getName().endswith("#>"); 385 } 386 387 /// Determine whether \p this is a name reserved for the implementation (C99 388 /// 7.1.3, C++ [lib.global.names]). 389 bool isReservedName(bool doubleUnderscoreOnly = false) const { 390 if (getLength() < 2) 391 return false; 392 const char *Name = getNameStart(); 393 return Name[0] == '_' && 394 (Name[1] == '_' || 395 (Name[1] >= 'A' && Name[1] <= 'Z' && !doubleUnderscoreOnly)); 396 } 397 398 /// Provide less than operator for lexicographical sorting. 399 bool operator<(const IdentifierInfo &RHS) const { 400 return getName() < RHS.getName(); 401 } 402 403 private: 404 /// The Preprocessor::HandleIdentifier does several special (but rare) 405 /// things to identifiers of various sorts. For example, it changes the 406 /// \c for keyword token from tok::identifier to tok::for. 407 /// 408 /// This method is very tied to the definition of HandleIdentifier. Any 409 /// change to it should be reflected here. 410 void RecomputeNeedsHandleIdentifier() { 411 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || 412 isExtensionToken() || isFutureCompatKeyword() || 413 isOutOfDate() || isModulesImport(); 414 } 415 }; 416 417 /// An RAII object for [un]poisoning an identifier within a scope. 418 /// 419 /// \p II is allowed to be null, in which case objects of this type have 420 /// no effect. 421 class PoisonIdentifierRAIIObject { 422 IdentifierInfo *const II; 423 const bool OldValue; 424 425 public: 426 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 427 : II(II), OldValue(II ? II->isPoisoned() : false) { 428 if(II) 429 II->setIsPoisoned(NewValue); 430 } 431 432 ~PoisonIdentifierRAIIObject() { 433 if(II) 434 II->setIsPoisoned(OldValue); 435 } 436 }; 437 438 /// An iterator that walks over all of the known identifiers 439 /// in the lookup table. 440 /// 441 /// Since this iterator uses an abstract interface via virtual 442 /// functions, it uses an object-oriented interface rather than the 443 /// more standard C++ STL iterator interface. In this OO-style 444 /// iteration, the single function \c Next() provides dereference, 445 /// advance, and end-of-sequence checking in a single 446 /// operation. Subclasses of this iterator type will provide the 447 /// actual functionality. 448 class IdentifierIterator { 449 protected: 450 IdentifierIterator() = default; 451 452 public: 453 IdentifierIterator(const IdentifierIterator &) = delete; 454 IdentifierIterator &operator=(const IdentifierIterator &) = delete; 455 456 virtual ~IdentifierIterator(); 457 458 /// Retrieve the next string in the identifier table and 459 /// advances the iterator for the following string. 460 /// 461 /// \returns The next string in the identifier table. If there is 462 /// no such string, returns an empty \c StringRef. 463 virtual StringRef Next() = 0; 464 }; 465 466 /// Provides lookups to, and iteration over, IdentiferInfo objects. 467 class IdentifierInfoLookup { 468 public: 469 virtual ~IdentifierInfoLookup(); 470 471 /// Return the IdentifierInfo for the specified named identifier. 472 /// 473 /// Unlike the version in IdentifierTable, this returns a pointer instead 474 /// of a reference. If the pointer is null then the IdentifierInfo cannot 475 /// be found. 476 virtual IdentifierInfo* get(StringRef Name) = 0; 477 478 /// Retrieve an iterator into the set of all identifiers 479 /// known to this identifier lookup source. 480 /// 481 /// This routine provides access to all of the identifiers known to 482 /// the identifier lookup, allowing access to the contents of the 483 /// identifiers without introducing the overhead of constructing 484 /// IdentifierInfo objects for each. 485 /// 486 /// \returns A new iterator into the set of known identifiers. The 487 /// caller is responsible for deleting this iterator. 488 virtual IdentifierIterator *getIdentifiers(); 489 }; 490 491 /// Implements an efficient mapping from strings to IdentifierInfo nodes. 492 /// 493 /// This has no other purpose, but this is an extremely performance-critical 494 /// piece of the code, as each occurrence of every identifier goes through 495 /// here when lexed. 496 class IdentifierTable { 497 // Shark shows that using MallocAllocator is *much* slower than using this 498 // BumpPtrAllocator! 499 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>; 500 HashTableTy HashTable; 501 502 IdentifierInfoLookup* ExternalLookup; 503 504 public: 505 /// Create the identifier table. 506 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr); 507 508 /// Create the identifier table, populating it with info about the 509 /// language keywords for the language specified by \p LangOpts. 510 explicit IdentifierTable(const LangOptions &LangOpts, 511 IdentifierInfoLookup *ExternalLookup = nullptr); 512 513 /// Set the external identifier lookup mechanism. 514 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 515 ExternalLookup = IILookup; 516 } 517 518 /// Retrieve the external identifier lookup object, if any. 519 IdentifierInfoLookup *getExternalIdentifierLookup() const { 520 return ExternalLookup; 521 } 522 523 llvm::BumpPtrAllocator& getAllocator() { 524 return HashTable.getAllocator(); 525 } 526 527 /// Return the identifier token info for the specified named 528 /// identifier. 529 IdentifierInfo &get(StringRef Name) { 530 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 531 532 IdentifierInfo *&II = Entry.second; 533 if (II) return *II; 534 535 // No entry; if we have an external lookup, look there first. 536 if (ExternalLookup) { 537 II = ExternalLookup->get(Name); 538 if (II) 539 return *II; 540 } 541 542 // Lookups failed, make a new IdentifierInfo. 543 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 544 II = new (Mem) IdentifierInfo(); 545 546 // Make sure getName() knows how to find the IdentifierInfo 547 // contents. 548 II->Entry = &Entry; 549 550 return *II; 551 } 552 553 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 554 IdentifierInfo &II = get(Name); 555 II.TokenID = TokenCode; 556 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 557 return II; 558 } 559 560 /// Gets an IdentifierInfo for the given name without consulting 561 /// external sources. 562 /// 563 /// This is a version of get() meant for external sources that want to 564 /// introduce or modify an identifier. If they called get(), they would 565 /// likely end up in a recursion. 566 IdentifierInfo &getOwn(StringRef Name) { 567 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 568 569 IdentifierInfo *&II = Entry.second; 570 if (II) 571 return *II; 572 573 // Lookups failed, make a new IdentifierInfo. 574 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 575 II = new (Mem) IdentifierInfo(); 576 577 // Make sure getName() knows how to find the IdentifierInfo 578 // contents. 579 II->Entry = &Entry; 580 581 // If this is the 'import' contextual keyword, mark it as such. 582 if (Name.equals("import")) 583 II->setModulesImport(true); 584 585 return *II; 586 } 587 588 using iterator = HashTableTy::const_iterator; 589 using const_iterator = HashTableTy::const_iterator; 590 591 iterator begin() const { return HashTable.begin(); } 592 iterator end() const { return HashTable.end(); } 593 unsigned size() const { return HashTable.size(); } 594 595 iterator find(StringRef Name) const { return HashTable.find(Name); } 596 597 /// Print some statistics to stderr that indicate how well the 598 /// hashing is doing. 599 void PrintStats() const; 600 601 /// Populate the identifier table with info about the language keywords 602 /// for the language specified by \p LangOpts. 603 void AddKeywords(const LangOptions &LangOpts); 604 }; 605 606 /// A family of Objective-C methods. 607 /// 608 /// These families have no inherent meaning in the language, but are 609 /// nonetheless central enough in the existing implementations to 610 /// merit direct AST support. While, in theory, arbitrary methods can 611 /// be considered to form families, we focus here on the methods 612 /// involving allocation and retain-count management, as these are the 613 /// most "core" and the most likely to be useful to diverse clients 614 /// without extra information. 615 /// 616 /// Both selectors and actual method declarations may be classified 617 /// into families. Method families may impose additional restrictions 618 /// beyond their selector name; for example, a method called '_init' 619 /// that returns void is not considered to be in the 'init' family 620 /// (but would be if it returned 'id'). It is also possible to 621 /// explicitly change or remove a method's family. Therefore the 622 /// method's family should be considered the single source of truth. 623 enum ObjCMethodFamily { 624 /// No particular method family. 625 OMF_None, 626 627 // Selectors in these families may have arbitrary arity, may be 628 // written with arbitrary leading underscores, and may have 629 // additional CamelCase "words" in their first selector chunk 630 // following the family name. 631 OMF_alloc, 632 OMF_copy, 633 OMF_init, 634 OMF_mutableCopy, 635 OMF_new, 636 637 // These families are singletons consisting only of the nullary 638 // selector with the given name. 639 OMF_autorelease, 640 OMF_dealloc, 641 OMF_finalize, 642 OMF_release, 643 OMF_retain, 644 OMF_retainCount, 645 OMF_self, 646 OMF_initialize, 647 648 // performSelector families 649 OMF_performSelector 650 }; 651 652 /// Enough bits to store any enumerator in ObjCMethodFamily or 653 /// InvalidObjCMethodFamily. 654 enum { ObjCMethodFamilyBitWidth = 4 }; 655 656 /// An invalid value of ObjCMethodFamily. 657 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 658 659 /// A family of Objective-C methods. 660 /// 661 /// These are family of methods whose result type is initially 'id', but 662 /// but are candidate for the result type to be changed to 'instancetype'. 663 enum ObjCInstanceTypeFamily { 664 OIT_None, 665 OIT_Array, 666 OIT_Dictionary, 667 OIT_Singleton, 668 OIT_Init, 669 OIT_ReturnsSelf 670 }; 671 672 enum ObjCStringFormatFamily { 673 SFF_None, 674 SFF_NSString, 675 SFF_CFString 676 }; 677 678 /// Smart pointer class that efficiently represents Objective-C method 679 /// names. 680 /// 681 /// This class will either point to an IdentifierInfo or a 682 /// MultiKeywordSelector (which is private). This enables us to optimize 683 /// selectors that take no arguments and selectors that take 1 argument, which 684 /// accounts for 78% of all selectors in Cocoa.h. 685 class Selector { 686 friend class Diagnostic; 687 friend class SelectorTable; // only the SelectorTable can create these 688 friend class DeclarationName; // and the AST's DeclarationName. 689 690 enum IdentifierInfoFlag { 691 // Empty selector = 0. Note that these enumeration values must 692 // correspond to the enumeration values of DeclarationName::StoredNameKind 693 ZeroArg = 0x01, 694 OneArg = 0x02, 695 MultiArg = 0x07, 696 ArgFlags = 0x07 697 }; 698 699 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low 700 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any 701 /// case IdentifierInfo and MultiKeywordSelector are already aligned to 702 /// 8 bytes even on 32 bits archs because of DeclarationName. 703 uintptr_t InfoPtr = 0; 704 705 Selector(IdentifierInfo *II, unsigned nArgs) { 706 InfoPtr = reinterpret_cast<uintptr_t>(II); 707 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 708 assert(nArgs < 2 && "nArgs not equal to 0/1"); 709 InfoPtr |= nArgs+1; 710 } 711 712 Selector(MultiKeywordSelector *SI) { 713 InfoPtr = reinterpret_cast<uintptr_t>(SI); 714 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 715 InfoPtr |= MultiArg; 716 } 717 718 IdentifierInfo *getAsIdentifierInfo() const { 719 if (getIdentifierInfoFlag() < MultiArg) 720 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 721 return nullptr; 722 } 723 724 MultiKeywordSelector *getMultiKeywordSelector() const { 725 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 726 } 727 728 unsigned getIdentifierInfoFlag() const { 729 return InfoPtr & ArgFlags; 730 } 731 732 static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 733 734 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel); 735 736 public: 737 /// The default ctor should only be used when creating data structures that 738 /// will contain selectors. 739 Selector() = default; 740 explicit Selector(uintptr_t V) : InfoPtr(V) {} 741 742 /// operator==/!= - Indicate whether the specified selectors are identical. 743 bool operator==(Selector RHS) const { 744 return InfoPtr == RHS.InfoPtr; 745 } 746 bool operator!=(Selector RHS) const { 747 return InfoPtr != RHS.InfoPtr; 748 } 749 750 void *getAsOpaquePtr() const { 751 return reinterpret_cast<void*>(InfoPtr); 752 } 753 754 /// Determine whether this is the empty selector. 755 bool isNull() const { return InfoPtr == 0; } 756 757 // Predicates to identify the selector type. 758 bool isKeywordSelector() const { 759 return getIdentifierInfoFlag() != ZeroArg; 760 } 761 762 bool isUnarySelector() const { 763 return getIdentifierInfoFlag() == ZeroArg; 764 } 765 766 /// If this selector is the specific keyword selector described by Names. 767 bool isKeywordSelector(ArrayRef<StringRef> Names) const; 768 769 /// If this selector is the specific unary selector described by Name. 770 bool isUnarySelector(StringRef Name) const; 771 772 unsigned getNumArgs() const; 773 774 /// Retrieve the identifier at a given position in the selector. 775 /// 776 /// Note that the identifier pointer returned may be NULL. Clients that only 777 /// care about the text of the identifier string, and not the specific, 778 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 779 /// an empty string when the identifier pointer would be NULL. 780 /// 781 /// \param argIndex The index for which we want to retrieve the identifier. 782 /// This index shall be less than \c getNumArgs() unless this is a keyword 783 /// selector, in which case 0 is the only permissible value. 784 /// 785 /// \returns the uniqued identifier for this slot, or NULL if this slot has 786 /// no corresponding identifier. 787 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 788 789 /// Retrieve the name at a given position in the selector. 790 /// 791 /// \param argIndex The index for which we want to retrieve the name. 792 /// This index shall be less than \c getNumArgs() unless this is a keyword 793 /// selector, in which case 0 is the only permissible value. 794 /// 795 /// \returns the name for this slot, which may be the empty string if no 796 /// name was supplied. 797 StringRef getNameForSlot(unsigned argIndex) const; 798 799 /// Derive the full selector name (e.g. "foo:bar:") and return 800 /// it as an std::string. 801 std::string getAsString() const; 802 803 /// Prints the full selector name (e.g. "foo:bar:"). 804 void print(llvm::raw_ostream &OS) const; 805 806 void dump() const; 807 808 /// Derive the conventional family of this method. 809 ObjCMethodFamily getMethodFamily() const { 810 return getMethodFamilyImpl(*this); 811 } 812 813 ObjCStringFormatFamily getStringFormatFamily() const { 814 return getStringFormatFamilyImpl(*this); 815 } 816 817 static Selector getEmptyMarker() { 818 return Selector(uintptr_t(-1)); 819 } 820 821 static Selector getTombstoneMarker() { 822 return Selector(uintptr_t(-2)); 823 } 824 825 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 826 }; 827 828 /// This table allows us to fully hide how we implement 829 /// multi-keyword caching. 830 class SelectorTable { 831 // Actually a SelectorTableImpl 832 void *Impl; 833 834 public: 835 SelectorTable(); 836 SelectorTable(const SelectorTable &) = delete; 837 SelectorTable &operator=(const SelectorTable &) = delete; 838 ~SelectorTable(); 839 840 /// Can create any sort of selector. 841 /// 842 /// \p NumArgs indicates whether this is a no argument selector "foo", a 843 /// single argument selector "foo:" or multi-argument "foo:bar:". 844 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 845 846 Selector getUnarySelector(IdentifierInfo *ID) { 847 return Selector(ID, 1); 848 } 849 850 Selector getNullarySelector(IdentifierInfo *ID) { 851 return Selector(ID, 0); 852 } 853 854 /// Return the total amount of memory allocated for managing selectors. 855 size_t getTotalMemory() const; 856 857 /// Return the default setter name for the given identifier. 858 /// 859 /// This is "set" + \p Name where the initial character of \p Name 860 /// has been capitalized. 861 static SmallString<64> constructSetterName(StringRef Name); 862 863 /// Return the default setter selector for the given identifier. 864 /// 865 /// This is "set" + \p Name where the initial character of \p Name 866 /// has been capitalized. 867 static Selector constructSetterSelector(IdentifierTable &Idents, 868 SelectorTable &SelTable, 869 const IdentifierInfo *Name); 870 871 /// Return the property name for the given setter selector. 872 static std::string getPropertyNameFromSetterSelector(Selector Sel); 873 }; 874 875 namespace detail { 876 877 /// DeclarationNameExtra is used as a base of various uncommon special names. 878 /// This class is needed since DeclarationName has not enough space to store 879 /// the kind of every possible names. Therefore the kind of common names is 880 /// stored directly in DeclarationName, and the kind of uncommon names is 881 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because 882 /// DeclarationName needs the lower 3 bits to store the kind of common names. 883 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change 884 /// here is very likely to require changes in DeclarationName(Table). 885 class alignas(IdentifierInfoAlignment) DeclarationNameExtra { 886 friend class clang::DeclarationName; 887 friend class clang::DeclarationNameTable; 888 889 protected: 890 /// The kind of "extra" information stored in the DeclarationName. See 891 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values 892 /// are used. Note that DeclarationName depends on the numerical values 893 /// of the enumerators in this enum. See DeclarationName::StoredNameKind 894 /// for more info. 895 enum ExtraKind { 896 CXXDeductionGuideName, 897 CXXLiteralOperatorName, 898 CXXUsingDirective, 899 ObjCMultiArgSelector 900 }; 901 902 /// ExtraKindOrNumArgs has one of the following meaning: 903 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra 904 /// is in this case in fact either a CXXDeductionGuideNameExtra or 905 /// a CXXLiteralOperatorIdName. 906 /// 907 /// * It may be also name common to C++ using-directives (CXXUsingDirective), 908 /// 909 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is 910 /// the number of arguments in the Objective-C selector, in which 911 /// case the DeclarationNameExtra is also a MultiKeywordSelector. 912 unsigned ExtraKindOrNumArgs; 913 914 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {} 915 DeclarationNameExtra(unsigned NumArgs) 916 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {} 917 918 /// Return the corresponding ExtraKind. 919 ExtraKind getKind() const { 920 return static_cast<ExtraKind>(ExtraKindOrNumArgs > 921 (unsigned)ObjCMultiArgSelector 922 ? (unsigned)ObjCMultiArgSelector 923 : ExtraKindOrNumArgs); 924 } 925 926 /// Return the number of arguments in an ObjC selector. Only valid when this 927 /// is indeed an ObjCMultiArgSelector. 928 unsigned getNumArgs() const { 929 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector && 930 "getNumArgs called but this is not an ObjC selector!"); 931 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector; 932 } 933 }; 934 935 } // namespace detail 936 937 } // namespace clang 938 939 namespace llvm { 940 941 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 942 /// DenseSets. 943 template <> 944 struct DenseMapInfo<clang::Selector> { 945 static clang::Selector getEmptyKey() { 946 return clang::Selector::getEmptyMarker(); 947 } 948 949 static clang::Selector getTombstoneKey() { 950 return clang::Selector::getTombstoneMarker(); 951 } 952 953 static unsigned getHashValue(clang::Selector S); 954 955 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 956 return LHS == RHS; 957 } 958 }; 959 960 template<> 961 struct PointerLikeTypeTraits<clang::Selector> { 962 static const void *getAsVoidPointer(clang::Selector P) { 963 return P.getAsOpaquePtr(); 964 } 965 966 static clang::Selector getFromVoidPointer(const void *P) { 967 return clang::Selector(reinterpret_cast<uintptr_t>(P)); 968 } 969 970 enum { NumLowBitsAvailable = 0 }; 971 }; 972 973 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 974 // are not guaranteed to be 8-byte aligned. 975 template<> 976 struct PointerLikeTypeTraits<clang::IdentifierInfo*> { 977 static void *getAsVoidPointer(clang::IdentifierInfo* P) { 978 return P; 979 } 980 981 static clang::IdentifierInfo *getFromVoidPointer(void *P) { 982 return static_cast<clang::IdentifierInfo*>(P); 983 } 984 985 enum { NumLowBitsAvailable = 1 }; 986 }; 987 988 template<> 989 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> { 990 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 991 return P; 992 } 993 994 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 995 return static_cast<const clang::IdentifierInfo*>(P); 996 } 997 998 enum { NumLowBitsAvailable = 1 }; 999 }; 1000 1001 } // namespace llvm 1002 1003 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 1004