1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseMapInfo.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/PointerLikeTypeTraits.h"
26 #include "llvm/Support/type_traits.h"
27 #include <cassert>
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <string>
32 #include <utility>
33 
34 namespace clang {
35 
36 class DeclarationName;
37 class DeclarationNameTable;
38 class IdentifierInfo;
39 class LangOptions;
40 class MultiKeywordSelector;
41 class SourceLocation;
42 
43 enum class ReservedIdentifierStatus {
44   NotReserved = 0,
45   StartsWithUnderscoreAtGlobalScope,
46   StartsWithDoubleUnderscore,
47   StartsWithUnderscoreFollowedByCapitalLetter,
48   ContainsDoubleUnderscore,
49 };
50 
51 /// A simple pair of identifier info and location.
52 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
53 
54 /// IdentifierInfo and other related classes are aligned to
55 /// 8 bytes so that DeclarationName can use the lower 3 bits
56 /// of a pointer to one of these classes.
57 enum { IdentifierInfoAlignment = 8 };
58 
59 static constexpr int ObjCOrBuiltinIDBits = 16;
60 
61 /// One of these records is kept for each identifier that
62 /// is lexed.  This contains information about whether the token was \#define'd,
63 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
64 /// variable or function name).  The preprocessor keeps this information in a
65 /// set, and all tok::identifier tokens have a pointer to one of these.
66 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
67 class alignas(IdentifierInfoAlignment) IdentifierInfo {
68   friend class IdentifierTable;
69 
70   // Front-end token ID or tok::identifier.
71   unsigned TokenID : 9;
72 
73   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
74   // First NUM_OBJC_KEYWORDS values are for Objective-C,
75   // the remaining values are for builtins.
76   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
77 
78   // True if there is a #define for this.
79   unsigned HasMacro : 1;
80 
81   // True if there was a #define for this.
82   unsigned HadMacro : 1;
83 
84   // True if the identifier is a language extension.
85   unsigned IsExtension : 1;
86 
87   // True if the identifier is a keyword in a newer or proposed Standard.
88   unsigned IsFutureCompatKeyword : 1;
89 
90   // True if the identifier is poisoned.
91   unsigned IsPoisoned : 1;
92 
93   // True if the identifier is a C++ operator keyword.
94   unsigned IsCPPOperatorKeyword : 1;
95 
96   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
97   // See comment about RecomputeNeedsHandleIdentifier for more info.
98   unsigned NeedsHandleIdentifier : 1;
99 
100   // True if the identifier was loaded (at least partially) from an AST file.
101   unsigned IsFromAST : 1;
102 
103   // True if the identifier has changed from the definition
104   // loaded from an AST file.
105   unsigned ChangedAfterLoad : 1;
106 
107   // True if the identifier's frontend information has changed from the
108   // definition loaded from an AST file.
109   unsigned FEChangedAfterLoad : 1;
110 
111   // True if revertTokenIDToIdentifier was called.
112   unsigned RevertedTokenID : 1;
113 
114   // True if there may be additional information about
115   // this identifier stored externally.
116   unsigned OutOfDate : 1;
117 
118   // True if this is the 'import' contextual keyword.
119   unsigned IsModulesImport : 1;
120 
121   // True if this is a mangled OpenMP variant name.
122   unsigned IsMangledOpenMPVariantName : 1;
123 
124   // 28 bits left in a 64-bit word.
125 
126   // Managed by the language front-end.
127   void *FETokenInfo = nullptr;
128 
129   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
130 
131   IdentifierInfo()
132       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
133         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
134         IsPoisoned(false), IsCPPOperatorKeyword(false),
135         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
136         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
137         IsModulesImport(false), IsMangledOpenMPVariantName(false) {}
138 
139 public:
140   IdentifierInfo(const IdentifierInfo &) = delete;
141   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
142   IdentifierInfo(IdentifierInfo &&) = delete;
143   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
144 
145   /// Return true if this is the identifier for the specified string.
146   ///
147   /// This is intended to be used for string literals only: II->isStr("foo").
148   template <std::size_t StrLen>
149   bool isStr(const char (&Str)[StrLen]) const {
150     return getLength() == StrLen-1 &&
151            memcmp(getNameStart(), Str, StrLen-1) == 0;
152   }
153 
154   /// Return true if this is the identifier for the specified StringRef.
155   bool isStr(llvm::StringRef Str) const {
156     llvm::StringRef ThisStr(getNameStart(), getLength());
157     return ThisStr == Str;
158   }
159 
160   /// Return the beginning of the actual null-terminated string for this
161   /// identifier.
162   const char *getNameStart() const { return Entry->getKeyData(); }
163 
164   /// Efficiently return the length of this identifier info.
165   unsigned getLength() const { return Entry->getKeyLength(); }
166 
167   /// Return the actual identifier string.
168   StringRef getName() const {
169     return StringRef(getNameStart(), getLength());
170   }
171 
172   /// Return true if this identifier is \#defined to some other value.
173   /// \note The current definition may be in a module and not currently visible.
174   bool hasMacroDefinition() const {
175     return HasMacro;
176   }
177   void setHasMacroDefinition(bool Val) {
178     if (HasMacro == Val) return;
179 
180     HasMacro = Val;
181     if (Val) {
182       NeedsHandleIdentifier = true;
183       HadMacro = true;
184     } else {
185       RecomputeNeedsHandleIdentifier();
186     }
187   }
188   /// Returns true if this identifier was \#defined to some value at any
189   /// moment. In this case there should be an entry for the identifier in the
190   /// macro history table in Preprocessor.
191   bool hadMacroDefinition() const {
192     return HadMacro;
193   }
194 
195   /// If this is a source-language token (e.g. 'for'), this API
196   /// can be used to cause the lexer to map identifiers to source-language
197   /// tokens.
198   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
199 
200   /// True if revertTokenIDToIdentifier() was called.
201   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
202 
203   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
204   /// compatibility.
205   ///
206   /// TokenID is normally read-only but there are 2 instances where we revert it
207   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
208   /// using this method so we can inform serialization about it.
209   void revertTokenIDToIdentifier() {
210     assert(TokenID != tok::identifier && "Already at tok::identifier");
211     TokenID = tok::identifier;
212     RevertedTokenID = true;
213   }
214   void revertIdentifierToTokenID(tok::TokenKind TK) {
215     assert(TokenID == tok::identifier && "Should be at tok::identifier");
216     TokenID = TK;
217     RevertedTokenID = false;
218   }
219 
220   /// Return the preprocessor keyword ID for this identifier.
221   ///
222   /// For example, "define" will return tok::pp_define.
223   tok::PPKeywordKind getPPKeywordID() const;
224 
225   /// Return the Objective-C keyword ID for the this identifier.
226   ///
227   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
228   tok::ObjCKeywordKind getObjCKeywordID() const {
229     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
230       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
231     else
232       return tok::objc_not_keyword;
233   }
234   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
235 
236   /// Return a value indicating whether this is a builtin function.
237   ///
238   /// 0 is not-built-in. 1+ are specific builtin functions.
239   unsigned getBuiltinID() const {
240     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
241       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
242     else
243       return 0;
244   }
245   void setBuiltinID(unsigned ID) {
246     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
247     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
248            && "ID too large for field!");
249   }
250 
251   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
252   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
253 
254   /// get/setExtension - Initialize information about whether or not this
255   /// language token is an extension.  This controls extension warnings, and is
256   /// only valid if a custom token ID is set.
257   bool isExtensionToken() const { return IsExtension; }
258   void setIsExtensionToken(bool Val) {
259     IsExtension = Val;
260     if (Val)
261       NeedsHandleIdentifier = true;
262     else
263       RecomputeNeedsHandleIdentifier();
264   }
265 
266   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
267   /// this language token is a keyword in a newer or proposed Standard. This
268   /// controls compatibility warnings, and is only true when not parsing the
269   /// corresponding Standard. Once a compatibility problem has been diagnosed
270   /// with this keyword, the flag will be cleared.
271   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
272   void setIsFutureCompatKeyword(bool Val) {
273     IsFutureCompatKeyword = Val;
274     if (Val)
275       NeedsHandleIdentifier = true;
276     else
277       RecomputeNeedsHandleIdentifier();
278   }
279 
280   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
281   /// Preprocessor will emit an error every time this token is used.
282   void setIsPoisoned(bool Value = true) {
283     IsPoisoned = Value;
284     if (Value)
285       NeedsHandleIdentifier = true;
286     else
287       RecomputeNeedsHandleIdentifier();
288   }
289 
290   /// Return true if this token has been poisoned.
291   bool isPoisoned() const { return IsPoisoned; }
292 
293   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
294   /// this identifier is a C++ alternate representation of an operator.
295   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
296     IsCPPOperatorKeyword = Val;
297   }
298   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
299 
300   /// Return true if this token is a keyword in the specified language.
301   bool isKeyword(const LangOptions &LangOpts) const;
302 
303   /// Return true if this token is a C++ keyword in the specified
304   /// language.
305   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
306 
307   /// Get and set FETokenInfo. The language front-end is allowed to associate
308   /// arbitrary metadata with this token.
309   void *getFETokenInfo() const { return FETokenInfo; }
310   void setFETokenInfo(void *T) { FETokenInfo = T; }
311 
312   /// Return true if the Preprocessor::HandleIdentifier must be called
313   /// on a token of this identifier.
314   ///
315   /// If this returns false, we know that HandleIdentifier will not affect
316   /// the token.
317   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
318 
319   /// Return true if the identifier in its current state was loaded
320   /// from an AST file.
321   bool isFromAST() const { return IsFromAST; }
322 
323   void setIsFromAST() { IsFromAST = true; }
324 
325   /// Determine whether this identifier has changed since it was loaded
326   /// from an AST file.
327   bool hasChangedSinceDeserialization() const {
328     return ChangedAfterLoad;
329   }
330 
331   /// Note that this identifier has changed since it was loaded from
332   /// an AST file.
333   void setChangedSinceDeserialization() {
334     ChangedAfterLoad = true;
335   }
336 
337   /// Determine whether the frontend token information for this
338   /// identifier has changed since it was loaded from an AST file.
339   bool hasFETokenInfoChangedSinceDeserialization() const {
340     return FEChangedAfterLoad;
341   }
342 
343   /// Note that the frontend token information for this identifier has
344   /// changed since it was loaded from an AST file.
345   void setFETokenInfoChangedSinceDeserialization() {
346     FEChangedAfterLoad = true;
347   }
348 
349   /// Determine whether the information for this identifier is out of
350   /// date with respect to the external source.
351   bool isOutOfDate() const { return OutOfDate; }
352 
353   /// Set whether the information for this identifier is out of
354   /// date with respect to the external source.
355   void setOutOfDate(bool OOD) {
356     OutOfDate = OOD;
357     if (OOD)
358       NeedsHandleIdentifier = true;
359     else
360       RecomputeNeedsHandleIdentifier();
361   }
362 
363   /// Determine whether this is the contextual keyword \c import.
364   bool isModulesImport() const { return IsModulesImport; }
365 
366   /// Set whether this identifier is the contextual keyword \c import.
367   void setModulesImport(bool I) {
368     IsModulesImport = I;
369     if (I)
370       NeedsHandleIdentifier = true;
371     else
372       RecomputeNeedsHandleIdentifier();
373   }
374 
375   /// Determine whether this is the mangled name of an OpenMP variant.
376   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
377 
378   /// Set whether this is the mangled name of an OpenMP variant.
379   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
380 
381   /// Return true if this identifier is an editor placeholder.
382   ///
383   /// Editor placeholders are produced by the code-completion engine and are
384   /// represented as characters between '<#' and '#>' in the source code. An
385   /// example of auto-completed call with a placeholder parameter is shown
386   /// below:
387   /// \code
388   ///   function(<#int x#>);
389   /// \endcode
390   bool isEditorPlaceholder() const {
391     return getName().startswith("<#") && getName().endswith("#>");
392   }
393 
394   /// Determine whether \p this is a name reserved for the implementation (C99
395   /// 7.1.3, C++ [lib.global.names]).
396   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
397 
398   /// Provide less than operator for lexicographical sorting.
399   bool operator<(const IdentifierInfo &RHS) const {
400     return getName() < RHS.getName();
401   }
402 
403 private:
404   /// The Preprocessor::HandleIdentifier does several special (but rare)
405   /// things to identifiers of various sorts.  For example, it changes the
406   /// \c for keyword token from tok::identifier to tok::for.
407   ///
408   /// This method is very tied to the definition of HandleIdentifier.  Any
409   /// change to it should be reflected here.
410   void RecomputeNeedsHandleIdentifier() {
411     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
412                             isExtensionToken() || isFutureCompatKeyword() ||
413                             isOutOfDate() || isModulesImport();
414   }
415 };
416 
417 /// An RAII object for [un]poisoning an identifier within a scope.
418 ///
419 /// \p II is allowed to be null, in which case objects of this type have
420 /// no effect.
421 class PoisonIdentifierRAIIObject {
422   IdentifierInfo *const II;
423   const bool OldValue;
424 
425 public:
426   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
427     : II(II), OldValue(II ? II->isPoisoned() : false) {
428     if(II)
429       II->setIsPoisoned(NewValue);
430   }
431 
432   ~PoisonIdentifierRAIIObject() {
433     if(II)
434       II->setIsPoisoned(OldValue);
435   }
436 };
437 
438 /// An iterator that walks over all of the known identifiers
439 /// in the lookup table.
440 ///
441 /// Since this iterator uses an abstract interface via virtual
442 /// functions, it uses an object-oriented interface rather than the
443 /// more standard C++ STL iterator interface. In this OO-style
444 /// iteration, the single function \c Next() provides dereference,
445 /// advance, and end-of-sequence checking in a single
446 /// operation. Subclasses of this iterator type will provide the
447 /// actual functionality.
448 class IdentifierIterator {
449 protected:
450   IdentifierIterator() = default;
451 
452 public:
453   IdentifierIterator(const IdentifierIterator &) = delete;
454   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
455 
456   virtual ~IdentifierIterator();
457 
458   /// Retrieve the next string in the identifier table and
459   /// advances the iterator for the following string.
460   ///
461   /// \returns The next string in the identifier table. If there is
462   /// no such string, returns an empty \c StringRef.
463   virtual StringRef Next() = 0;
464 };
465 
466 /// Provides lookups to, and iteration over, IdentiferInfo objects.
467 class IdentifierInfoLookup {
468 public:
469   virtual ~IdentifierInfoLookup();
470 
471   /// Return the IdentifierInfo for the specified named identifier.
472   ///
473   /// Unlike the version in IdentifierTable, this returns a pointer instead
474   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
475   /// be found.
476   virtual IdentifierInfo* get(StringRef Name) = 0;
477 
478   /// Retrieve an iterator into the set of all identifiers
479   /// known to this identifier lookup source.
480   ///
481   /// This routine provides access to all of the identifiers known to
482   /// the identifier lookup, allowing access to the contents of the
483   /// identifiers without introducing the overhead of constructing
484   /// IdentifierInfo objects for each.
485   ///
486   /// \returns A new iterator into the set of known identifiers. The
487   /// caller is responsible for deleting this iterator.
488   virtual IdentifierIterator *getIdentifiers();
489 };
490 
491 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
492 ///
493 /// This has no other purpose, but this is an extremely performance-critical
494 /// piece of the code, as each occurrence of every identifier goes through
495 /// here when lexed.
496 class IdentifierTable {
497   // Shark shows that using MallocAllocator is *much* slower than using this
498   // BumpPtrAllocator!
499   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
500   HashTableTy HashTable;
501 
502   IdentifierInfoLookup* ExternalLookup;
503 
504 public:
505   /// Create the identifier table.
506   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
507 
508   /// Create the identifier table, populating it with info about the
509   /// language keywords for the language specified by \p LangOpts.
510   explicit IdentifierTable(const LangOptions &LangOpts,
511                            IdentifierInfoLookup *ExternalLookup = nullptr);
512 
513   /// Set the external identifier lookup mechanism.
514   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
515     ExternalLookup = IILookup;
516   }
517 
518   /// Retrieve the external identifier lookup object, if any.
519   IdentifierInfoLookup *getExternalIdentifierLookup() const {
520     return ExternalLookup;
521   }
522 
523   llvm::BumpPtrAllocator& getAllocator() {
524     return HashTable.getAllocator();
525   }
526 
527   /// Return the identifier token info for the specified named
528   /// identifier.
529   IdentifierInfo &get(StringRef Name) {
530     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
531 
532     IdentifierInfo *&II = Entry.second;
533     if (II) return *II;
534 
535     // No entry; if we have an external lookup, look there first.
536     if (ExternalLookup) {
537       II = ExternalLookup->get(Name);
538       if (II)
539         return *II;
540     }
541 
542     // Lookups failed, make a new IdentifierInfo.
543     void *Mem = getAllocator().Allocate<IdentifierInfo>();
544     II = new (Mem) IdentifierInfo();
545 
546     // Make sure getName() knows how to find the IdentifierInfo
547     // contents.
548     II->Entry = &Entry;
549 
550     return *II;
551   }
552 
553   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
554     IdentifierInfo &II = get(Name);
555     II.TokenID = TokenCode;
556     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
557     return II;
558   }
559 
560   /// Gets an IdentifierInfo for the given name without consulting
561   ///        external sources.
562   ///
563   /// This is a version of get() meant for external sources that want to
564   /// introduce or modify an identifier. If they called get(), they would
565   /// likely end up in a recursion.
566   IdentifierInfo &getOwn(StringRef Name) {
567     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
568 
569     IdentifierInfo *&II = Entry.second;
570     if (II)
571       return *II;
572 
573     // Lookups failed, make a new IdentifierInfo.
574     void *Mem = getAllocator().Allocate<IdentifierInfo>();
575     II = new (Mem) IdentifierInfo();
576 
577     // Make sure getName() knows how to find the IdentifierInfo
578     // contents.
579     II->Entry = &Entry;
580 
581     // If this is the 'import' contextual keyword, mark it as such.
582     if (Name.equals("import"))
583       II->setModulesImport(true);
584 
585     return *II;
586   }
587 
588   using iterator = HashTableTy::const_iterator;
589   using const_iterator = HashTableTy::const_iterator;
590 
591   iterator begin() const { return HashTable.begin(); }
592   iterator end() const   { return HashTable.end(); }
593   unsigned size() const  { return HashTable.size(); }
594 
595   iterator find(StringRef Name) const { return HashTable.find(Name); }
596 
597   /// Print some statistics to stderr that indicate how well the
598   /// hashing is doing.
599   void PrintStats() const;
600 
601   /// Populate the identifier table with info about the language keywords
602   /// for the language specified by \p LangOpts.
603   void AddKeywords(const LangOptions &LangOpts);
604 };
605 
606 /// A family of Objective-C methods.
607 ///
608 /// These families have no inherent meaning in the language, but are
609 /// nonetheless central enough in the existing implementations to
610 /// merit direct AST support.  While, in theory, arbitrary methods can
611 /// be considered to form families, we focus here on the methods
612 /// involving allocation and retain-count management, as these are the
613 /// most "core" and the most likely to be useful to diverse clients
614 /// without extra information.
615 ///
616 /// Both selectors and actual method declarations may be classified
617 /// into families.  Method families may impose additional restrictions
618 /// beyond their selector name; for example, a method called '_init'
619 /// that returns void is not considered to be in the 'init' family
620 /// (but would be if it returned 'id').  It is also possible to
621 /// explicitly change or remove a method's family.  Therefore the
622 /// method's family should be considered the single source of truth.
623 enum ObjCMethodFamily {
624   /// No particular method family.
625   OMF_None,
626 
627   // Selectors in these families may have arbitrary arity, may be
628   // written with arbitrary leading underscores, and may have
629   // additional CamelCase "words" in their first selector chunk
630   // following the family name.
631   OMF_alloc,
632   OMF_copy,
633   OMF_init,
634   OMF_mutableCopy,
635   OMF_new,
636 
637   // These families are singletons consisting only of the nullary
638   // selector with the given name.
639   OMF_autorelease,
640   OMF_dealloc,
641   OMF_finalize,
642   OMF_release,
643   OMF_retain,
644   OMF_retainCount,
645   OMF_self,
646   OMF_initialize,
647 
648   // performSelector families
649   OMF_performSelector
650 };
651 
652 /// Enough bits to store any enumerator in ObjCMethodFamily or
653 /// InvalidObjCMethodFamily.
654 enum { ObjCMethodFamilyBitWidth = 4 };
655 
656 /// An invalid value of ObjCMethodFamily.
657 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
658 
659 /// A family of Objective-C methods.
660 ///
661 /// These are family of methods whose result type is initially 'id', but
662 /// but are candidate for the result type to be changed to 'instancetype'.
663 enum ObjCInstanceTypeFamily {
664   OIT_None,
665   OIT_Array,
666   OIT_Dictionary,
667   OIT_Singleton,
668   OIT_Init,
669   OIT_ReturnsSelf
670 };
671 
672 enum ObjCStringFormatFamily {
673   SFF_None,
674   SFF_NSString,
675   SFF_CFString
676 };
677 
678 /// Smart pointer class that efficiently represents Objective-C method
679 /// names.
680 ///
681 /// This class will either point to an IdentifierInfo or a
682 /// MultiKeywordSelector (which is private). This enables us to optimize
683 /// selectors that take no arguments and selectors that take 1 argument, which
684 /// accounts for 78% of all selectors in Cocoa.h.
685 class Selector {
686   friend class Diagnostic;
687   friend class SelectorTable; // only the SelectorTable can create these
688   friend class DeclarationName; // and the AST's DeclarationName.
689 
690   enum IdentifierInfoFlag {
691     // Empty selector = 0. Note that these enumeration values must
692     // correspond to the enumeration values of DeclarationName::StoredNameKind
693     ZeroArg  = 0x01,
694     OneArg   = 0x02,
695     MultiArg = 0x07,
696     ArgFlags = 0x07
697   };
698 
699   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
700   /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
701   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
702   /// 8 bytes even on 32 bits archs because of DeclarationName.
703   uintptr_t InfoPtr = 0;
704 
705   Selector(IdentifierInfo *II, unsigned nArgs) {
706     InfoPtr = reinterpret_cast<uintptr_t>(II);
707     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
708     assert(nArgs < 2 && "nArgs not equal to 0/1");
709     InfoPtr |= nArgs+1;
710   }
711 
712   Selector(MultiKeywordSelector *SI) {
713     InfoPtr = reinterpret_cast<uintptr_t>(SI);
714     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
715     InfoPtr |= MultiArg;
716   }
717 
718   IdentifierInfo *getAsIdentifierInfo() const {
719     if (getIdentifierInfoFlag() < MultiArg)
720       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
721     return nullptr;
722   }
723 
724   MultiKeywordSelector *getMultiKeywordSelector() const {
725     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
726   }
727 
728   unsigned getIdentifierInfoFlag() const {
729     return InfoPtr & ArgFlags;
730   }
731 
732   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
733 
734   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
735 
736 public:
737   /// The default ctor should only be used when creating data structures that
738   ///  will contain selectors.
739   Selector() = default;
740   explicit Selector(uintptr_t V) : InfoPtr(V) {}
741 
742   /// operator==/!= - Indicate whether the specified selectors are identical.
743   bool operator==(Selector RHS) const {
744     return InfoPtr == RHS.InfoPtr;
745   }
746   bool operator!=(Selector RHS) const {
747     return InfoPtr != RHS.InfoPtr;
748   }
749 
750   void *getAsOpaquePtr() const {
751     return reinterpret_cast<void*>(InfoPtr);
752   }
753 
754   /// Determine whether this is the empty selector.
755   bool isNull() const { return InfoPtr == 0; }
756 
757   // Predicates to identify the selector type.
758   bool isKeywordSelector() const {
759     return getIdentifierInfoFlag() != ZeroArg;
760   }
761 
762   bool isUnarySelector() const {
763     return getIdentifierInfoFlag() == ZeroArg;
764   }
765 
766   /// If this selector is the specific keyword selector described by Names.
767   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
768 
769   /// If this selector is the specific unary selector described by Name.
770   bool isUnarySelector(StringRef Name) const;
771 
772   unsigned getNumArgs() const;
773 
774   /// Retrieve the identifier at a given position in the selector.
775   ///
776   /// Note that the identifier pointer returned may be NULL. Clients that only
777   /// care about the text of the identifier string, and not the specific,
778   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
779   /// an empty string when the identifier pointer would be NULL.
780   ///
781   /// \param argIndex The index for which we want to retrieve the identifier.
782   /// This index shall be less than \c getNumArgs() unless this is a keyword
783   /// selector, in which case 0 is the only permissible value.
784   ///
785   /// \returns the uniqued identifier for this slot, or NULL if this slot has
786   /// no corresponding identifier.
787   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
788 
789   /// Retrieve the name at a given position in the selector.
790   ///
791   /// \param argIndex The index for which we want to retrieve the name.
792   /// This index shall be less than \c getNumArgs() unless this is a keyword
793   /// selector, in which case 0 is the only permissible value.
794   ///
795   /// \returns the name for this slot, which may be the empty string if no
796   /// name was supplied.
797   StringRef getNameForSlot(unsigned argIndex) const;
798 
799   /// Derive the full selector name (e.g. "foo:bar:") and return
800   /// it as an std::string.
801   std::string getAsString() const;
802 
803   /// Prints the full selector name (e.g. "foo:bar:").
804   void print(llvm::raw_ostream &OS) const;
805 
806   void dump() const;
807 
808   /// Derive the conventional family of this method.
809   ObjCMethodFamily getMethodFamily() const {
810     return getMethodFamilyImpl(*this);
811   }
812 
813   ObjCStringFormatFamily getStringFormatFamily() const {
814     return getStringFormatFamilyImpl(*this);
815   }
816 
817   static Selector getEmptyMarker() {
818     return Selector(uintptr_t(-1));
819   }
820 
821   static Selector getTombstoneMarker() {
822     return Selector(uintptr_t(-2));
823   }
824 
825   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
826 };
827 
828 /// This table allows us to fully hide how we implement
829 /// multi-keyword caching.
830 class SelectorTable {
831   // Actually a SelectorTableImpl
832   void *Impl;
833 
834 public:
835   SelectorTable();
836   SelectorTable(const SelectorTable &) = delete;
837   SelectorTable &operator=(const SelectorTable &) = delete;
838   ~SelectorTable();
839 
840   /// Can create any sort of selector.
841   ///
842   /// \p NumArgs indicates whether this is a no argument selector "foo", a
843   /// single argument selector "foo:" or multi-argument "foo:bar:".
844   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
845 
846   Selector getUnarySelector(IdentifierInfo *ID) {
847     return Selector(ID, 1);
848   }
849 
850   Selector getNullarySelector(IdentifierInfo *ID) {
851     return Selector(ID, 0);
852   }
853 
854   /// Return the total amount of memory allocated for managing selectors.
855   size_t getTotalMemory() const;
856 
857   /// Return the default setter name for the given identifier.
858   ///
859   /// This is "set" + \p Name where the initial character of \p Name
860   /// has been capitalized.
861   static SmallString<64> constructSetterName(StringRef Name);
862 
863   /// Return the default setter selector for the given identifier.
864   ///
865   /// This is "set" + \p Name where the initial character of \p Name
866   /// has been capitalized.
867   static Selector constructSetterSelector(IdentifierTable &Idents,
868                                           SelectorTable &SelTable,
869                                           const IdentifierInfo *Name);
870 
871   /// Return the property name for the given setter selector.
872   static std::string getPropertyNameFromSetterSelector(Selector Sel);
873 };
874 
875 namespace detail {
876 
877 /// DeclarationNameExtra is used as a base of various uncommon special names.
878 /// This class is needed since DeclarationName has not enough space to store
879 /// the kind of every possible names. Therefore the kind of common names is
880 /// stored directly in DeclarationName, and the kind of uncommon names is
881 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
882 /// DeclarationName needs the lower 3 bits to store the kind of common names.
883 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
884 /// here is very likely to require changes in DeclarationName(Table).
885 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
886   friend class clang::DeclarationName;
887   friend class clang::DeclarationNameTable;
888 
889 protected:
890   /// The kind of "extra" information stored in the DeclarationName. See
891   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
892   /// are used. Note that DeclarationName depends on the numerical values
893   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
894   /// for more info.
895   enum ExtraKind {
896     CXXDeductionGuideName,
897     CXXLiteralOperatorName,
898     CXXUsingDirective,
899     ObjCMultiArgSelector
900   };
901 
902   /// ExtraKindOrNumArgs has one of the following meaning:
903   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
904   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
905   ///    a CXXLiteralOperatorIdName.
906   ///
907   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
908   ///
909   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
910   ///    the number of arguments in the Objective-C selector, in which
911   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
912   unsigned ExtraKindOrNumArgs;
913 
914   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
915   DeclarationNameExtra(unsigned NumArgs)
916       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
917 
918   /// Return the corresponding ExtraKind.
919   ExtraKind getKind() const {
920     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
921                                           (unsigned)ObjCMultiArgSelector
922                                       ? (unsigned)ObjCMultiArgSelector
923                                       : ExtraKindOrNumArgs);
924   }
925 
926   /// Return the number of arguments in an ObjC selector. Only valid when this
927   /// is indeed an ObjCMultiArgSelector.
928   unsigned getNumArgs() const {
929     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
930            "getNumArgs called but this is not an ObjC selector!");
931     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
932   }
933 };
934 
935 } // namespace detail
936 
937 }  // namespace clang
938 
939 namespace llvm {
940 
941 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
942 /// DenseSets.
943 template <>
944 struct DenseMapInfo<clang::Selector> {
945   static clang::Selector getEmptyKey() {
946     return clang::Selector::getEmptyMarker();
947   }
948 
949   static clang::Selector getTombstoneKey() {
950     return clang::Selector::getTombstoneMarker();
951   }
952 
953   static unsigned getHashValue(clang::Selector S);
954 
955   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
956     return LHS == RHS;
957   }
958 };
959 
960 template<>
961 struct PointerLikeTypeTraits<clang::Selector> {
962   static const void *getAsVoidPointer(clang::Selector P) {
963     return P.getAsOpaquePtr();
964   }
965 
966   static clang::Selector getFromVoidPointer(const void *P) {
967     return clang::Selector(reinterpret_cast<uintptr_t>(P));
968   }
969 
970   static constexpr int NumLowBitsAvailable = 0;
971 };
972 
973 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
974 // are not guaranteed to be 8-byte aligned.
975 template<>
976 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
977   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
978     return P;
979   }
980 
981   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
982     return static_cast<clang::IdentifierInfo*>(P);
983   }
984 
985   static constexpr int NumLowBitsAvailable = 1;
986 };
987 
988 template<>
989 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
990   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
991     return P;
992   }
993 
994   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
995     return static_cast<const clang::IdentifierInfo*>(P);
996   }
997 
998   static constexpr int NumLowBitsAvailable = 1;
999 };
1000 
1001 } // namespace llvm
1002 
1003 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1004