1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12 /// clang::Selector interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18 
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/DenseMapInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/PointerLikeTypeTraits.h"
27 #include "llvm/Support/type_traits.h"
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 #include <string>
33 #include <utility>
34 
35 namespace clang {
36 
37 class IdentifierInfo;
38 class LangOptions;
39 class MultiKeywordSelector;
40 class SourceLocation;
41 
42 /// A simple pair of identifier info and location.
43 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
44 
45 /// One of these records is kept for each identifier that
46 /// is lexed.  This contains information about whether the token was \#define'd,
47 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
48 /// variable or function name).  The preprocessor keeps this information in a
49 /// set, and all tok::identifier tokens have a pointer to one of these.
50 class IdentifierInfo {
51   friend class IdentifierTable;
52 
53   unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
54   // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
55   // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
56   // are for builtins.
57   unsigned ObjCOrBuiltinID    :13;
58   bool HasMacro               : 1; // True if there is a #define for this.
59   bool HadMacro               : 1; // True if there was a #define for this.
60   bool IsExtension            : 1; // True if identifier is a lang extension.
61   bool IsFutureCompatKeyword  : 1; // True if identifier is a keyword in a
62                                    // newer Standard or proposed Standard.
63   bool IsPoisoned             : 1; // True if identifier is poisoned.
64   bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
65   bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
66   bool IsFromAST              : 1; // True if identifier was loaded (at least
67                                    // partially) from an AST file.
68   bool ChangedAfterLoad       : 1; // True if identifier has changed from the
69                                    // definition loaded from an AST file.
70   bool FEChangedAfterLoad     : 1; // True if identifier's frontend information
71                                    // has changed from the definition loaded
72                                    // from an AST file.
73   bool RevertedTokenID        : 1; // True if revertTokenIDToIdentifier was
74                                    // called.
75   bool OutOfDate              : 1; // True if there may be additional
76                                    // information about this identifier
77                                    // stored externally.
78   bool IsModulesImport        : 1; // True if this is the 'import' contextual
79                                    // keyword.
80   // 29 bit left in 64-bit word.
81 
82   // Managed by the language front-end.
83   void *FETokenInfo = nullptr;
84 
85   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
86 
87 public:
88   IdentifierInfo();
89   IdentifierInfo(const IdentifierInfo &) = delete;
90   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
91 
92   /// Return true if this is the identifier for the specified string.
93   ///
94   /// This is intended to be used for string literals only: II->isStr("foo").
95   template <std::size_t StrLen>
isStr(const char (& Str)[StrLen])96   bool isStr(const char (&Str)[StrLen]) const {
97     return getLength() == StrLen-1 &&
98            memcmp(getNameStart(), Str, StrLen-1) == 0;
99   }
100 
101   /// Return true if this is the identifier for the specified StringRef.
isStr(llvm::StringRef Str)102   bool isStr(llvm::StringRef Str) const {
103     llvm::StringRef ThisStr(getNameStart(), getLength());
104     return ThisStr == Str;
105   }
106 
107   /// Return the beginning of the actual null-terminated string for this
108   /// identifier.
getNameStart()109   const char *getNameStart() const {
110     if (Entry) return Entry->getKeyData();
111     // FIXME: This is gross. It would be best not to embed specific details
112     // of the PTH file format here.
113     // The 'this' pointer really points to a
114     // std::pair<IdentifierInfo, const char*>, where internal pointer
115     // points to the external string data.
116     using actualtype = std::pair<IdentifierInfo, const char *>;
117 
118     return ((const actualtype*) this)->second;
119   }
120 
121   /// Efficiently return the length of this identifier info.
getLength()122   unsigned getLength() const {
123     if (Entry) return Entry->getKeyLength();
124     // FIXME: This is gross. It would be best not to embed specific details
125     // of the PTH file format here.
126     // The 'this' pointer really points to a
127     // std::pair<IdentifierInfo, const char*>, where internal pointer
128     // points to the external string data.
129     using actualtype = std::pair<IdentifierInfo, const char *>;
130 
131     const char* p = ((const actualtype*) this)->second - 2;
132     return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
133   }
134 
135   /// Return the actual identifier string.
getName()136   StringRef getName() const {
137     return StringRef(getNameStart(), getLength());
138   }
139 
140   /// Return true if this identifier is \#defined to some other value.
141   /// \note The current definition may be in a module and not currently visible.
hasMacroDefinition()142   bool hasMacroDefinition() const {
143     return HasMacro;
144   }
setHasMacroDefinition(bool Val)145   void setHasMacroDefinition(bool Val) {
146     if (HasMacro == Val) return;
147 
148     HasMacro = Val;
149     if (Val) {
150       NeedsHandleIdentifier = true;
151       HadMacro = true;
152     } else {
153       RecomputeNeedsHandleIdentifier();
154     }
155   }
156   /// Returns true if this identifier was \#defined to some value at any
157   /// moment. In this case there should be an entry for the identifier in the
158   /// macro history table in Preprocessor.
hadMacroDefinition()159   bool hadMacroDefinition() const {
160     return HadMacro;
161   }
162 
163   /// If this is a source-language token (e.g. 'for'), this API
164   /// can be used to cause the lexer to map identifiers to source-language
165   /// tokens.
getTokenID()166   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
167 
168   /// True if revertTokenIDToIdentifier() was called.
hasRevertedTokenIDToIdentifier()169   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
170 
171   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
172   /// compatibility.
173   ///
174   /// TokenID is normally read-only but there are 2 instances where we revert it
175   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
176   /// using this method so we can inform serialization about it.
revertTokenIDToIdentifier()177   void revertTokenIDToIdentifier() {
178     assert(TokenID != tok::identifier && "Already at tok::identifier");
179     TokenID = tok::identifier;
180     RevertedTokenID = true;
181   }
revertIdentifierToTokenID(tok::TokenKind TK)182   void revertIdentifierToTokenID(tok::TokenKind TK) {
183     assert(TokenID == tok::identifier && "Should be at tok::identifier");
184     TokenID = TK;
185     RevertedTokenID = false;
186   }
187 
188   /// Return the preprocessor keyword ID for this identifier.
189   ///
190   /// For example, "define" will return tok::pp_define.
191   tok::PPKeywordKind getPPKeywordID() const;
192 
193   /// Return the Objective-C keyword ID for the this identifier.
194   ///
195   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
getObjCKeywordID()196   tok::ObjCKeywordKind getObjCKeywordID() const {
197     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
198       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
199     else
200       return tok::objc_not_keyword;
201   }
setObjCKeywordID(tok::ObjCKeywordKind ID)202   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
203 
204   /// True if setNotBuiltin() was called.
hasRevertedBuiltin()205   bool hasRevertedBuiltin() const {
206     return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
207   }
208 
209   /// Revert the identifier to a non-builtin identifier. We do this if
210   /// the name of a known builtin library function is used to declare that
211   /// function, but an unexpected type is specified.
revertBuiltin()212   void revertBuiltin() {
213     setBuiltinID(0);
214   }
215 
216   /// Return a value indicating whether this is a builtin function.
217   ///
218   /// 0 is not-built-in. 1+ are specific builtin functions.
getBuiltinID()219   unsigned getBuiltinID() const {
220     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
221       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
222     else
223       return 0;
224   }
setBuiltinID(unsigned ID)225   void setBuiltinID(unsigned ID) {
226     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
227     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
228            && "ID too large for field!");
229   }
230 
getObjCOrBuiltinID()231   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
setObjCOrBuiltinID(unsigned ID)232   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
233 
234   /// get/setExtension - Initialize information about whether or not this
235   /// language token is an extension.  This controls extension warnings, and is
236   /// only valid if a custom token ID is set.
isExtensionToken()237   bool isExtensionToken() const { return IsExtension; }
setIsExtensionToken(bool Val)238   void setIsExtensionToken(bool Val) {
239     IsExtension = Val;
240     if (Val)
241       NeedsHandleIdentifier = true;
242     else
243       RecomputeNeedsHandleIdentifier();
244   }
245 
246   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
247   /// this language token is a keyword in a newer or proposed Standard. This
248   /// controls compatibility warnings, and is only true when not parsing the
249   /// corresponding Standard. Once a compatibility problem has been diagnosed
250   /// with this keyword, the flag will be cleared.
isFutureCompatKeyword()251   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
setIsFutureCompatKeyword(bool Val)252   void setIsFutureCompatKeyword(bool Val) {
253     IsFutureCompatKeyword = Val;
254     if (Val)
255       NeedsHandleIdentifier = true;
256     else
257       RecomputeNeedsHandleIdentifier();
258   }
259 
260   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
261   /// Preprocessor will emit an error every time this token is used.
262   void setIsPoisoned(bool Value = true) {
263     IsPoisoned = Value;
264     if (Value)
265       NeedsHandleIdentifier = true;
266     else
267       RecomputeNeedsHandleIdentifier();
268   }
269 
270   /// Return true if this token has been poisoned.
isPoisoned()271   bool isPoisoned() const { return IsPoisoned; }
272 
273   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
274   /// this identifier is a C++ alternate representation of an operator.
275   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
276     IsCPPOperatorKeyword = Val;
277   }
isCPlusPlusOperatorKeyword()278   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
279 
280   /// Return true if this token is a keyword in the specified language.
281   bool isKeyword(const LangOptions &LangOpts) const;
282 
283   /// Return true if this token is a C++ keyword in the specified
284   /// language.
285   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
286 
287   /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
288   /// associate arbitrary metadata with this token.
289   template<typename T>
getFETokenInfo()290   T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
setFETokenInfo(void * T)291   void setFETokenInfo(void *T) { FETokenInfo = T; }
292 
293   /// Return true if the Preprocessor::HandleIdentifier must be called
294   /// on a token of this identifier.
295   ///
296   /// If this returns false, we know that HandleIdentifier will not affect
297   /// the token.
isHandleIdentifierCase()298   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
299 
300   /// Return true if the identifier in its current state was loaded
301   /// from an AST file.
isFromAST()302   bool isFromAST() const { return IsFromAST; }
303 
setIsFromAST()304   void setIsFromAST() { IsFromAST = true; }
305 
306   /// Determine whether this identifier has changed since it was loaded
307   /// from an AST file.
hasChangedSinceDeserialization()308   bool hasChangedSinceDeserialization() const {
309     return ChangedAfterLoad;
310   }
311 
312   /// Note that this identifier has changed since it was loaded from
313   /// an AST file.
setChangedSinceDeserialization()314   void setChangedSinceDeserialization() {
315     ChangedAfterLoad = true;
316   }
317 
318   /// Determine whether the frontend token information for this
319   /// identifier has changed since it was loaded from an AST file.
hasFETokenInfoChangedSinceDeserialization()320   bool hasFETokenInfoChangedSinceDeserialization() const {
321     return FEChangedAfterLoad;
322   }
323 
324   /// Note that the frontend token information for this identifier has
325   /// changed since it was loaded from an AST file.
setFETokenInfoChangedSinceDeserialization()326   void setFETokenInfoChangedSinceDeserialization() {
327     FEChangedAfterLoad = true;
328   }
329 
330   /// Determine whether the information for this identifier is out of
331   /// date with respect to the external source.
isOutOfDate()332   bool isOutOfDate() const { return OutOfDate; }
333 
334   /// Set whether the information for this identifier is out of
335   /// date with respect to the external source.
setOutOfDate(bool OOD)336   void setOutOfDate(bool OOD) {
337     OutOfDate = OOD;
338     if (OOD)
339       NeedsHandleIdentifier = true;
340     else
341       RecomputeNeedsHandleIdentifier();
342   }
343 
344   /// Determine whether this is the contextual keyword \c import.
isModulesImport()345   bool isModulesImport() const { return IsModulesImport; }
346 
347   /// Set whether this identifier is the contextual keyword \c import.
setModulesImport(bool I)348   void setModulesImport(bool I) {
349     IsModulesImport = I;
350     if (I)
351       NeedsHandleIdentifier = true;
352     else
353       RecomputeNeedsHandleIdentifier();
354   }
355 
356   /// Return true if this identifier is an editor placeholder.
357   ///
358   /// Editor placeholders are produced by the code-completion engine and are
359   /// represented as characters between '<#' and '#>' in the source code. An
360   /// example of auto-completed call with a placeholder parameter is shown
361   /// below:
362   /// \code
363   ///   function(<#int x#>);
364   /// \endcode
isEditorPlaceholder()365   bool isEditorPlaceholder() const {
366     return getName().startswith("<#") && getName().endswith("#>");
367   }
368 
369   /// Provide less than operator for lexicographical sorting.
370   bool operator<(const IdentifierInfo &RHS) const {
371     return getName() < RHS.getName();
372   }
373 
374 private:
375   /// The Preprocessor::HandleIdentifier does several special (but rare)
376   /// things to identifiers of various sorts.  For example, it changes the
377   /// \c for keyword token from tok::identifier to tok::for.
378   ///
379   /// This method is very tied to the definition of HandleIdentifier.  Any
380   /// change to it should be reflected here.
RecomputeNeedsHandleIdentifier()381   void RecomputeNeedsHandleIdentifier() {
382     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
383                             isExtensionToken() || isFutureCompatKeyword() ||
384                             isOutOfDate() || isModulesImport();
385   }
386 };
387 
388 /// An RAII object for [un]poisoning an identifier within a scope.
389 ///
390 /// \p II is allowed to be null, in which case objects of this type have
391 /// no effect.
392 class PoisonIdentifierRAIIObject {
393   IdentifierInfo *const II;
394   const bool OldValue;
395 
396 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)397   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
398     : II(II), OldValue(II ? II->isPoisoned() : false) {
399     if(II)
400       II->setIsPoisoned(NewValue);
401   }
402 
~PoisonIdentifierRAIIObject()403   ~PoisonIdentifierRAIIObject() {
404     if(II)
405       II->setIsPoisoned(OldValue);
406   }
407 };
408 
409 /// An iterator that walks over all of the known identifiers
410 /// in the lookup table.
411 ///
412 /// Since this iterator uses an abstract interface via virtual
413 /// functions, it uses an object-oriented interface rather than the
414 /// more standard C++ STL iterator interface. In this OO-style
415 /// iteration, the single function \c Next() provides dereference,
416 /// advance, and end-of-sequence checking in a single
417 /// operation. Subclasses of this iterator type will provide the
418 /// actual functionality.
419 class IdentifierIterator {
420 protected:
421   IdentifierIterator() = default;
422 
423 public:
424   IdentifierIterator(const IdentifierIterator &) = delete;
425   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
426 
427   virtual ~IdentifierIterator();
428 
429   /// Retrieve the next string in the identifier table and
430   /// advances the iterator for the following string.
431   ///
432   /// \returns The next string in the identifier table. If there is
433   /// no such string, returns an empty \c StringRef.
434   virtual StringRef Next() = 0;
435 };
436 
437 /// Provides lookups to, and iteration over, IdentiferInfo objects.
438 class IdentifierInfoLookup {
439 public:
440   virtual ~IdentifierInfoLookup();
441 
442   /// Return the IdentifierInfo for the specified named identifier.
443   ///
444   /// Unlike the version in IdentifierTable, this returns a pointer instead
445   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
446   /// be found.
447   virtual IdentifierInfo* get(StringRef Name) = 0;
448 
449   /// Retrieve an iterator into the set of all identifiers
450   /// known to this identifier lookup source.
451   ///
452   /// This routine provides access to all of the identifiers known to
453   /// the identifier lookup, allowing access to the contents of the
454   /// identifiers without introducing the overhead of constructing
455   /// IdentifierInfo objects for each.
456   ///
457   /// \returns A new iterator into the set of known identifiers. The
458   /// caller is responsible for deleting this iterator.
459   virtual IdentifierIterator *getIdentifiers();
460 };
461 
462 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
463 ///
464 /// This has no other purpose, but this is an extremely performance-critical
465 /// piece of the code, as each occurrence of every identifier goes through
466 /// here when lexed.
467 class IdentifierTable {
468   // Shark shows that using MallocAllocator is *much* slower than using this
469   // BumpPtrAllocator!
470   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
471   HashTableTy HashTable;
472 
473   IdentifierInfoLookup* ExternalLookup;
474 
475 public:
476   /// Create the identifier table.
477   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
478 
479   /// Create the identifier table, populating it with info about the
480   /// language keywords for the language specified by \p LangOpts.
481   explicit IdentifierTable(const LangOptions &LangOpts,
482                            IdentifierInfoLookup *ExternalLookup = nullptr);
483 
484   /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)485   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
486     ExternalLookup = IILookup;
487   }
488 
489   /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()490   IdentifierInfoLookup *getExternalIdentifierLookup() const {
491     return ExternalLookup;
492   }
493 
getAllocator()494   llvm::BumpPtrAllocator& getAllocator() {
495     return HashTable.getAllocator();
496   }
497 
498   /// Return the identifier token info for the specified named
499   /// identifier.
get(StringRef Name)500   IdentifierInfo &get(StringRef Name) {
501     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
502 
503     IdentifierInfo *&II = Entry.second;
504     if (II) return *II;
505 
506     // No entry; if we have an external lookup, look there first.
507     if (ExternalLookup) {
508       II = ExternalLookup->get(Name);
509       if (II)
510         return *II;
511     }
512 
513     // Lookups failed, make a new IdentifierInfo.
514     void *Mem = getAllocator().Allocate<IdentifierInfo>();
515     II = new (Mem) IdentifierInfo();
516 
517     // Make sure getName() knows how to find the IdentifierInfo
518     // contents.
519     II->Entry = &Entry;
520 
521     return *II;
522   }
523 
get(StringRef Name,tok::TokenKind TokenCode)524   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
525     IdentifierInfo &II = get(Name);
526     II.TokenID = TokenCode;
527     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
528     return II;
529   }
530 
531   /// Gets an IdentifierInfo for the given name without consulting
532   ///        external sources.
533   ///
534   /// This is a version of get() meant for external sources that want to
535   /// introduce or modify an identifier. If they called get(), they would
536   /// likely end up in a recursion.
getOwn(StringRef Name)537   IdentifierInfo &getOwn(StringRef Name) {
538     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
539 
540     IdentifierInfo *&II = Entry.second;
541     if (II)
542       return *II;
543 
544     // Lookups failed, make a new IdentifierInfo.
545     void *Mem = getAllocator().Allocate<IdentifierInfo>();
546     II = new (Mem) IdentifierInfo();
547 
548     // Make sure getName() knows how to find the IdentifierInfo
549     // contents.
550     II->Entry = &Entry;
551 
552     // If this is the 'import' contextual keyword, mark it as such.
553     if (Name.equals("import"))
554       II->setModulesImport(true);
555 
556     return *II;
557   }
558 
559   using iterator = HashTableTy::const_iterator;
560   using const_iterator = HashTableTy::const_iterator;
561 
begin()562   iterator begin() const { return HashTable.begin(); }
end()563   iterator end() const   { return HashTable.end(); }
size()564   unsigned size() const  { return HashTable.size(); }
565 
566   /// Print some statistics to stderr that indicate how well the
567   /// hashing is doing.
568   void PrintStats() const;
569 
570   /// Populate the identifier table with info about the language keywords
571   /// for the language specified by \p LangOpts.
572   void AddKeywords(const LangOptions &LangOpts);
573 };
574 
575 /// A family of Objective-C methods.
576 ///
577 /// These families have no inherent meaning in the language, but are
578 /// nonetheless central enough in the existing implementations to
579 /// merit direct AST support.  While, in theory, arbitrary methods can
580 /// be considered to form families, we focus here on the methods
581 /// involving allocation and retain-count management, as these are the
582 /// most "core" and the most likely to be useful to diverse clients
583 /// without extra information.
584 ///
585 /// Both selectors and actual method declarations may be classified
586 /// into families.  Method families may impose additional restrictions
587 /// beyond their selector name; for example, a method called '_init'
588 /// that returns void is not considered to be in the 'init' family
589 /// (but would be if it returned 'id').  It is also possible to
590 /// explicitly change or remove a method's family.  Therefore the
591 /// method's family should be considered the single source of truth.
592 enum ObjCMethodFamily {
593   /// No particular method family.
594   OMF_None,
595 
596   // Selectors in these families may have arbitrary arity, may be
597   // written with arbitrary leading underscores, and may have
598   // additional CamelCase "words" in their first selector chunk
599   // following the family name.
600   OMF_alloc,
601   OMF_copy,
602   OMF_init,
603   OMF_mutableCopy,
604   OMF_new,
605 
606   // These families are singletons consisting only of the nullary
607   // selector with the given name.
608   OMF_autorelease,
609   OMF_dealloc,
610   OMF_finalize,
611   OMF_release,
612   OMF_retain,
613   OMF_retainCount,
614   OMF_self,
615   OMF_initialize,
616 
617   // performSelector families
618   OMF_performSelector
619 };
620 
621 /// Enough bits to store any enumerator in ObjCMethodFamily or
622 /// InvalidObjCMethodFamily.
623 enum { ObjCMethodFamilyBitWidth = 4 };
624 
625 /// An invalid value of ObjCMethodFamily.
626 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
627 
628 /// A family of Objective-C methods.
629 ///
630 /// These are family of methods whose result type is initially 'id', but
631 /// but are candidate for the result type to be changed to 'instancetype'.
632 enum ObjCInstanceTypeFamily {
633   OIT_None,
634   OIT_Array,
635   OIT_Dictionary,
636   OIT_Singleton,
637   OIT_Init,
638   OIT_ReturnsSelf
639 };
640 
641 enum ObjCStringFormatFamily {
642   SFF_None,
643   SFF_NSString,
644   SFF_CFString
645 };
646 
647 /// Smart pointer class that efficiently represents Objective-C method
648 /// names.
649 ///
650 /// This class will either point to an IdentifierInfo or a
651 /// MultiKeywordSelector (which is private). This enables us to optimize
652 /// selectors that take no arguments and selectors that take 1 argument, which
653 /// accounts for 78% of all selectors in Cocoa.h.
654 class Selector {
655   friend class Diagnostic;
656 
657   enum IdentifierInfoFlag {
658     // Empty selector = 0.
659     ZeroArg  = 0x1,
660     OneArg   = 0x2,
661     MultiArg = 0x3,
662     ArgFlags = ZeroArg|OneArg
663   };
664 
665   // a pointer to the MultiKeywordSelector or IdentifierInfo.
666   uintptr_t InfoPtr = 0;
667 
Selector(IdentifierInfo * II,unsigned nArgs)668   Selector(IdentifierInfo *II, unsigned nArgs) {
669     InfoPtr = reinterpret_cast<uintptr_t>(II);
670     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
671     assert(nArgs < 2 && "nArgs not equal to 0/1");
672     InfoPtr |= nArgs+1;
673   }
674 
Selector(MultiKeywordSelector * SI)675   Selector(MultiKeywordSelector *SI) {
676     InfoPtr = reinterpret_cast<uintptr_t>(SI);
677     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
678     InfoPtr |= MultiArg;
679   }
680 
getAsIdentifierInfo()681   IdentifierInfo *getAsIdentifierInfo() const {
682     if (getIdentifierInfoFlag() < MultiArg)
683       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
684     return nullptr;
685   }
686 
getMultiKeywordSelector()687   MultiKeywordSelector *getMultiKeywordSelector() const {
688     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
689   }
690 
getIdentifierInfoFlag()691   unsigned getIdentifierInfoFlag() const {
692     return InfoPtr & ArgFlags;
693   }
694 
695   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
696 
697   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
698 
699 public:
700   friend class SelectorTable; // only the SelectorTable can create these
701   friend class DeclarationName; // and the AST's DeclarationName.
702 
703   /// The default ctor should only be used when creating data structures that
704   ///  will contain selectors.
705   Selector() = default;
Selector(uintptr_t V)706   Selector(uintptr_t V) : InfoPtr(V) {}
707 
708   /// operator==/!= - Indicate whether the specified selectors are identical.
709   bool operator==(Selector RHS) const {
710     return InfoPtr == RHS.InfoPtr;
711   }
712   bool operator!=(Selector RHS) const {
713     return InfoPtr != RHS.InfoPtr;
714   }
715 
getAsOpaquePtr()716   void *getAsOpaquePtr() const {
717     return reinterpret_cast<void*>(InfoPtr);
718   }
719 
720   /// Determine whether this is the empty selector.
isNull()721   bool isNull() const { return InfoPtr == 0; }
722 
723   // Predicates to identify the selector type.
isKeywordSelector()724   bool isKeywordSelector() const {
725     return getIdentifierInfoFlag() != ZeroArg;
726   }
727 
isUnarySelector()728   bool isUnarySelector() const {
729     return getIdentifierInfoFlag() == ZeroArg;
730   }
731 
732   unsigned getNumArgs() const;
733 
734   /// Retrieve the identifier at a given position in the selector.
735   ///
736   /// Note that the identifier pointer returned may be NULL. Clients that only
737   /// care about the text of the identifier string, and not the specific,
738   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
739   /// an empty string when the identifier pointer would be NULL.
740   ///
741   /// \param argIndex The index for which we want to retrieve the identifier.
742   /// This index shall be less than \c getNumArgs() unless this is a keyword
743   /// selector, in which case 0 is the only permissible value.
744   ///
745   /// \returns the uniqued identifier for this slot, or NULL if this slot has
746   /// no corresponding identifier.
747   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
748 
749   /// Retrieve the name at a given position in the selector.
750   ///
751   /// \param argIndex The index for which we want to retrieve the name.
752   /// This index shall be less than \c getNumArgs() unless this is a keyword
753   /// selector, in which case 0 is the only permissible value.
754   ///
755   /// \returns the name for this slot, which may be the empty string if no
756   /// name was supplied.
757   StringRef getNameForSlot(unsigned argIndex) const;
758 
759   /// Derive the full selector name (e.g. "foo:bar:") and return
760   /// it as an std::string.
761   std::string getAsString() const;
762 
763   /// Prints the full selector name (e.g. "foo:bar:").
764   void print(llvm::raw_ostream &OS) const;
765 
766   void dump() const;
767 
768   /// Derive the conventional family of this method.
getMethodFamily()769   ObjCMethodFamily getMethodFamily() const {
770     return getMethodFamilyImpl(*this);
771   }
772 
getStringFormatFamily()773   ObjCStringFormatFamily getStringFormatFamily() const {
774     return getStringFormatFamilyImpl(*this);
775   }
776 
getEmptyMarker()777   static Selector getEmptyMarker() {
778     return Selector(uintptr_t(-1));
779   }
780 
getTombstoneMarker()781   static Selector getTombstoneMarker() {
782     return Selector(uintptr_t(-2));
783   }
784 
785   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
786 };
787 
788 /// This table allows us to fully hide how we implement
789 /// multi-keyword caching.
790 class SelectorTable {
791   // Actually a SelectorTableImpl
792   void *Impl;
793 
794 public:
795   SelectorTable();
796   SelectorTable(const SelectorTable &) = delete;
797   SelectorTable &operator=(const SelectorTable &) = delete;
798   ~SelectorTable();
799 
800   /// Can create any sort of selector.
801   ///
802   /// \p NumArgs indicates whether this is a no argument selector "foo", a
803   /// single argument selector "foo:" or multi-argument "foo:bar:".
804   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
805 
getUnarySelector(IdentifierInfo * ID)806   Selector getUnarySelector(IdentifierInfo *ID) {
807     return Selector(ID, 1);
808   }
809 
getNullarySelector(IdentifierInfo * ID)810   Selector getNullarySelector(IdentifierInfo *ID) {
811     return Selector(ID, 0);
812   }
813 
814   /// Return the total amount of memory allocated for managing selectors.
815   size_t getTotalMemory() const;
816 
817   /// Return the default setter name for the given identifier.
818   ///
819   /// This is "set" + \p Name where the initial character of \p Name
820   /// has been capitalized.
821   static SmallString<64> constructSetterName(StringRef Name);
822 
823   /// Return the default setter selector for the given identifier.
824   ///
825   /// This is "set" + \p Name where the initial character of \p Name
826   /// has been capitalized.
827   static Selector constructSetterSelector(IdentifierTable &Idents,
828                                           SelectorTable &SelTable,
829                                           const IdentifierInfo *Name);
830 
831   /// Return the property name for the given setter selector.
832   static std::string getPropertyNameFromSetterSelector(Selector Sel);
833 };
834 
835 /// DeclarationNameExtra - Common base of the MultiKeywordSelector,
836 /// CXXSpecialName, and CXXOperatorIdName classes, all of which are
837 /// private classes that describe different kinds of names.
838 class DeclarationNameExtra {
839 public:
840   /// ExtraKind - The kind of "extra" information stored in the
841   /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
842   /// how these enumerator values are used.
843   enum ExtraKind {
844     CXXConstructor = 0,
845     CXXDestructor,
846     CXXConversionFunction,
847 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
848     CXXOperator##Name,
849 #include "clang/Basic/OperatorKinds.def"
850     CXXDeductionGuide,
851     CXXLiteralOperator,
852     CXXUsingDirective,
853     NUM_EXTRA_KINDS
854   };
855 
856   /// ExtraKindOrNumArgs - Either the kind of C++ special name or
857   /// operator-id (if the value is one of the CXX* enumerators of
858   /// ExtraKind), in which case the DeclarationNameExtra is also a
859   /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
860   /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
861   /// it may be also name common to C++ using-directives (CXXUsingDirective),
862   /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
863   /// arguments in the Objective-C selector, in which case the
864   /// DeclarationNameExtra is also a MultiKeywordSelector.
865   unsigned ExtraKindOrNumArgs;
866 };
867 
868 }  // namespace clang
869 
870 namespace llvm {
871 
872 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
873 /// DenseSets.
874 template <>
875 struct DenseMapInfo<clang::Selector> {
876   static clang::Selector getEmptyKey() {
877     return clang::Selector::getEmptyMarker();
878   }
879 
880   static clang::Selector getTombstoneKey() {
881     return clang::Selector::getTombstoneMarker();
882   }
883 
884   static unsigned getHashValue(clang::Selector S);
885 
886   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
887     return LHS == RHS;
888   }
889 };
890 
891 template <>
892 struct isPodLike<clang::Selector> { static const bool value = true; };
893 
894 template<>
895 struct PointerLikeTypeTraits<clang::Selector> {
896   static const void *getAsVoidPointer(clang::Selector P) {
897     return P.getAsOpaquePtr();
898   }
899 
900   static clang::Selector getFromVoidPointer(const void *P) {
901     return clang::Selector(reinterpret_cast<uintptr_t>(P));
902   }
903 
904   enum { NumLowBitsAvailable = 0 };
905 };
906 
907 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
908 // are not guaranteed to be 8-byte aligned.
909 template<>
910 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
911   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
912     return P;
913   }
914 
915   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
916     return static_cast<clang::IdentifierInfo*>(P);
917   }
918 
919   enum { NumLowBitsAvailable = 1 };
920 };
921 
922 template<>
923 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
924   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
925     return P;
926   }
927 
928   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
929     return static_cast<const clang::IdentifierInfo*>(P);
930   }
931 
932   enum { NumLowBitsAvailable = 1 };
933 };
934 
935 } // namespace llvm
936 
937 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
938