1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseMapInfo.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/PointerLikeTypeTraits.h"
26 #include "llvm/Support/type_traits.h"
27 #include <cassert>
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <string>
32 #include <utility>
33 
34 namespace clang {
35 
36 class DeclarationName;
37 class DeclarationNameTable;
38 class IdentifierInfo;
39 class LangOptions;
40 class MultiKeywordSelector;
41 class SourceLocation;
42 
43 enum class ReservedIdentifierStatus {
44   NotReserved = 0,
45   StartsWithUnderscoreAtGlobalScope,
46   StartsWithUnderscoreAndIsExternC,
47   StartsWithDoubleUnderscore,
48   StartsWithUnderscoreFollowedByCapitalLetter,
49   ContainsDoubleUnderscore,
50 };
51 
52 /// Determine whether an identifier is reserved for use as a name at global
53 /// scope. Such identifiers might be implementation-specific global functions
54 /// or variables.
55 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
56   return Status != ReservedIdentifierStatus::NotReserved;
57 }
58 
59 /// Determine whether an identifier is reserved in all contexts. Such
60 /// identifiers might be implementation-specific keywords or macros, for
61 /// example.
62 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
63   return Status != ReservedIdentifierStatus::NotReserved &&
64          Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
65          Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
66 }
67 
68 /// A simple pair of identifier info and location.
69 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
70 
71 /// IdentifierInfo and other related classes are aligned to
72 /// 8 bytes so that DeclarationName can use the lower 3 bits
73 /// of a pointer to one of these classes.
74 enum { IdentifierInfoAlignment = 8 };
75 
76 static constexpr int ObjCOrBuiltinIDBits = 16;
77 
78 /// One of these records is kept for each identifier that
79 /// is lexed.  This contains information about whether the token was \#define'd,
80 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
81 /// variable or function name).  The preprocessor keeps this information in a
82 /// set, and all tok::identifier tokens have a pointer to one of these.
83 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
84 class alignas(IdentifierInfoAlignment) IdentifierInfo {
85   friend class IdentifierTable;
86 
87   // Front-end token ID or tok::identifier.
88   unsigned TokenID : 9;
89 
90   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
91   // First NUM_OBJC_KEYWORDS values are for Objective-C,
92   // the remaining values are for builtins.
93   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
94 
95   // True if there is a #define for this.
96   unsigned HasMacro : 1;
97 
98   // True if there was a #define for this.
99   unsigned HadMacro : 1;
100 
101   // True if the identifier is a language extension.
102   unsigned IsExtension : 1;
103 
104   // True if the identifier is a keyword in a newer or proposed Standard.
105   unsigned IsFutureCompatKeyword : 1;
106 
107   // True if the identifier is poisoned.
108   unsigned IsPoisoned : 1;
109 
110   // True if the identifier is a C++ operator keyword.
111   unsigned IsCPPOperatorKeyword : 1;
112 
113   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
114   // See comment about RecomputeNeedsHandleIdentifier for more info.
115   unsigned NeedsHandleIdentifier : 1;
116 
117   // True if the identifier was loaded (at least partially) from an AST file.
118   unsigned IsFromAST : 1;
119 
120   // True if the identifier has changed from the definition
121   // loaded from an AST file.
122   unsigned ChangedAfterLoad : 1;
123 
124   // True if the identifier's frontend information has changed from the
125   // definition loaded from an AST file.
126   unsigned FEChangedAfterLoad : 1;
127 
128   // True if revertTokenIDToIdentifier was called.
129   unsigned RevertedTokenID : 1;
130 
131   // True if there may be additional information about
132   // this identifier stored externally.
133   unsigned OutOfDate : 1;
134 
135   // True if this is the 'import' contextual keyword.
136   unsigned IsModulesImport : 1;
137 
138   // True if this is a mangled OpenMP variant name.
139   unsigned IsMangledOpenMPVariantName : 1;
140 
141   // True if this is a deprecated macro.
142   unsigned IsDeprecatedMacro : 1;
143 
144   // True if this macro is unsafe in headers.
145   unsigned IsRestrictExpansion : 1;
146 
147   // True if this macro is final.
148   unsigned IsFinal : 1;
149 
150   // 22 bits left in a 64-bit word.
151 
152   // Managed by the language front-end.
153   void *FETokenInfo = nullptr;
154 
155   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
156 
157   IdentifierInfo()
158       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
159         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
160         IsPoisoned(false), IsCPPOperatorKeyword(false),
161         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
162         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
163         IsModulesImport(false), IsMangledOpenMPVariantName(false),
164         IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
165 
166 public:
167   IdentifierInfo(const IdentifierInfo &) = delete;
168   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
169   IdentifierInfo(IdentifierInfo &&) = delete;
170   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
171 
172   /// Return true if this is the identifier for the specified string.
173   ///
174   /// This is intended to be used for string literals only: II->isStr("foo").
175   template <std::size_t StrLen>
176   bool isStr(const char (&Str)[StrLen]) const {
177     return getLength() == StrLen-1 &&
178            memcmp(getNameStart(), Str, StrLen-1) == 0;
179   }
180 
181   /// Return true if this is the identifier for the specified StringRef.
182   bool isStr(llvm::StringRef Str) const {
183     llvm::StringRef ThisStr(getNameStart(), getLength());
184     return ThisStr == Str;
185   }
186 
187   /// Return the beginning of the actual null-terminated string for this
188   /// identifier.
189   const char *getNameStart() const { return Entry->getKeyData(); }
190 
191   /// Efficiently return the length of this identifier info.
192   unsigned getLength() const { return Entry->getKeyLength(); }
193 
194   /// Return the actual identifier string.
195   StringRef getName() const {
196     return StringRef(getNameStart(), getLength());
197   }
198 
199   /// Return true if this identifier is \#defined to some other value.
200   /// \note The current definition may be in a module and not currently visible.
201   bool hasMacroDefinition() const {
202     return HasMacro;
203   }
204   void setHasMacroDefinition(bool Val) {
205     if (HasMacro == Val) return;
206 
207     HasMacro = Val;
208     if (Val) {
209       NeedsHandleIdentifier = true;
210       HadMacro = true;
211     } else {
212       // If this is a final macro, make the deprecation and header unsafe bits
213       // stick around after the undefinition so they apply to any redefinitions.
214       if (!IsFinal) {
215         // Because calling the setters of these calls recomputes, just set them
216         // manually to avoid recomputing a bunch of times.
217         IsDeprecatedMacro = false;
218         IsRestrictExpansion = false;
219       }
220       RecomputeNeedsHandleIdentifier();
221     }
222   }
223   /// Returns true if this identifier was \#defined to some value at any
224   /// moment. In this case there should be an entry for the identifier in the
225   /// macro history table in Preprocessor.
226   bool hadMacroDefinition() const {
227     return HadMacro;
228   }
229 
230   bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
231 
232   void setIsDeprecatedMacro(bool Val) {
233     if (IsDeprecatedMacro == Val)
234       return;
235     IsDeprecatedMacro = Val;
236     if (Val)
237       NeedsHandleIdentifier = true;
238     else
239       RecomputeNeedsHandleIdentifier();
240   }
241 
242   bool isRestrictExpansion() const { return IsRestrictExpansion; }
243 
244   void setIsRestrictExpansion(bool Val) {
245     if (IsRestrictExpansion == Val)
246       return;
247     IsRestrictExpansion = Val;
248     if (Val)
249       NeedsHandleIdentifier = true;
250     else
251       RecomputeNeedsHandleIdentifier();
252   }
253 
254   bool isFinal() const { return IsFinal; }
255 
256   void setIsFinal(bool Val) { IsFinal = Val; }
257 
258   /// If this is a source-language token (e.g. 'for'), this API
259   /// can be used to cause the lexer to map identifiers to source-language
260   /// tokens.
261   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
262 
263   /// True if revertTokenIDToIdentifier() was called.
264   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
265 
266   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
267   /// compatibility.
268   ///
269   /// TokenID is normally read-only but there are 2 instances where we revert it
270   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
271   /// using this method so we can inform serialization about it.
272   void revertTokenIDToIdentifier() {
273     assert(TokenID != tok::identifier && "Already at tok::identifier");
274     TokenID = tok::identifier;
275     RevertedTokenID = true;
276   }
277   void revertIdentifierToTokenID(tok::TokenKind TK) {
278     assert(TokenID == tok::identifier && "Should be at tok::identifier");
279     TokenID = TK;
280     RevertedTokenID = false;
281   }
282 
283   /// Return the preprocessor keyword ID for this identifier.
284   ///
285   /// For example, "define" will return tok::pp_define.
286   tok::PPKeywordKind getPPKeywordID() const;
287 
288   /// Return the Objective-C keyword ID for the this identifier.
289   ///
290   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
291   tok::ObjCKeywordKind getObjCKeywordID() const {
292     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
293       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
294     else
295       return tok::objc_not_keyword;
296   }
297   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
298 
299   /// Return a value indicating whether this is a builtin function.
300   ///
301   /// 0 is not-built-in. 1+ are specific builtin functions.
302   unsigned getBuiltinID() const {
303     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
304       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
305     else
306       return 0;
307   }
308   void setBuiltinID(unsigned ID) {
309     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
310     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
311            && "ID too large for field!");
312   }
313 
314   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
315   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
316 
317   /// get/setExtension - Initialize information about whether or not this
318   /// language token is an extension.  This controls extension warnings, and is
319   /// only valid if a custom token ID is set.
320   bool isExtensionToken() const { return IsExtension; }
321   void setIsExtensionToken(bool Val) {
322     IsExtension = Val;
323     if (Val)
324       NeedsHandleIdentifier = true;
325     else
326       RecomputeNeedsHandleIdentifier();
327   }
328 
329   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
330   /// this language token is a keyword in a newer or proposed Standard. This
331   /// controls compatibility warnings, and is only true when not parsing the
332   /// corresponding Standard. Once a compatibility problem has been diagnosed
333   /// with this keyword, the flag will be cleared.
334   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
335   void setIsFutureCompatKeyword(bool Val) {
336     IsFutureCompatKeyword = Val;
337     if (Val)
338       NeedsHandleIdentifier = true;
339     else
340       RecomputeNeedsHandleIdentifier();
341   }
342 
343   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
344   /// Preprocessor will emit an error every time this token is used.
345   void setIsPoisoned(bool Value = true) {
346     IsPoisoned = Value;
347     if (Value)
348       NeedsHandleIdentifier = true;
349     else
350       RecomputeNeedsHandleIdentifier();
351   }
352 
353   /// Return true if this token has been poisoned.
354   bool isPoisoned() const { return IsPoisoned; }
355 
356   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
357   /// this identifier is a C++ alternate representation of an operator.
358   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
359     IsCPPOperatorKeyword = Val;
360   }
361   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
362 
363   /// Return true if this token is a keyword in the specified language.
364   bool isKeyword(const LangOptions &LangOpts) const;
365 
366   /// Return true if this token is a C++ keyword in the specified
367   /// language.
368   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
369 
370   /// Get and set FETokenInfo. The language front-end is allowed to associate
371   /// arbitrary metadata with this token.
372   void *getFETokenInfo() const { return FETokenInfo; }
373   void setFETokenInfo(void *T) { FETokenInfo = T; }
374 
375   /// Return true if the Preprocessor::HandleIdentifier must be called
376   /// on a token of this identifier.
377   ///
378   /// If this returns false, we know that HandleIdentifier will not affect
379   /// the token.
380   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
381 
382   /// Return true if the identifier in its current state was loaded
383   /// from an AST file.
384   bool isFromAST() const { return IsFromAST; }
385 
386   void setIsFromAST() { IsFromAST = true; }
387 
388   /// Determine whether this identifier has changed since it was loaded
389   /// from an AST file.
390   bool hasChangedSinceDeserialization() const {
391     return ChangedAfterLoad;
392   }
393 
394   /// Note that this identifier has changed since it was loaded from
395   /// an AST file.
396   void setChangedSinceDeserialization() {
397     ChangedAfterLoad = true;
398   }
399 
400   /// Determine whether the frontend token information for this
401   /// identifier has changed since it was loaded from an AST file.
402   bool hasFETokenInfoChangedSinceDeserialization() const {
403     return FEChangedAfterLoad;
404   }
405 
406   /// Note that the frontend token information for this identifier has
407   /// changed since it was loaded from an AST file.
408   void setFETokenInfoChangedSinceDeserialization() {
409     FEChangedAfterLoad = true;
410   }
411 
412   /// Determine whether the information for this identifier is out of
413   /// date with respect to the external source.
414   bool isOutOfDate() const { return OutOfDate; }
415 
416   /// Set whether the information for this identifier is out of
417   /// date with respect to the external source.
418   void setOutOfDate(bool OOD) {
419     OutOfDate = OOD;
420     if (OOD)
421       NeedsHandleIdentifier = true;
422     else
423       RecomputeNeedsHandleIdentifier();
424   }
425 
426   /// Determine whether this is the contextual keyword \c import.
427   bool isModulesImport() const { return IsModulesImport; }
428 
429   /// Set whether this identifier is the contextual keyword \c import.
430   void setModulesImport(bool I) {
431     IsModulesImport = I;
432     if (I)
433       NeedsHandleIdentifier = true;
434     else
435       RecomputeNeedsHandleIdentifier();
436   }
437 
438   /// Determine whether this is the mangled name of an OpenMP variant.
439   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
440 
441   /// Set whether this is the mangled name of an OpenMP variant.
442   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
443 
444   /// Return true if this identifier is an editor placeholder.
445   ///
446   /// Editor placeholders are produced by the code-completion engine and are
447   /// represented as characters between '<#' and '#>' in the source code. An
448   /// example of auto-completed call with a placeholder parameter is shown
449   /// below:
450   /// \code
451   ///   function(<#int x#>);
452   /// \endcode
453   bool isEditorPlaceholder() const {
454     return getName().startswith("<#") && getName().endswith("#>");
455   }
456 
457   /// Determine whether \p this is a name reserved for the implementation (C99
458   /// 7.1.3, C++ [lib.global.names]).
459   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
460 
461   /// If the identifier is an "uglified" reserved name, return a cleaned form.
462   /// e.g. _Foo => Foo. Otherwise, just returns the name.
463   StringRef deuglifiedName() const;
464 
465   /// Provide less than operator for lexicographical sorting.
466   bool operator<(const IdentifierInfo &RHS) const {
467     return getName() < RHS.getName();
468   }
469 
470 private:
471   /// The Preprocessor::HandleIdentifier does several special (but rare)
472   /// things to identifiers of various sorts.  For example, it changes the
473   /// \c for keyword token from tok::identifier to tok::for.
474   ///
475   /// This method is very tied to the definition of HandleIdentifier.  Any
476   /// change to it should be reflected here.
477   void RecomputeNeedsHandleIdentifier() {
478     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
479                             isExtensionToken() || isFutureCompatKeyword() ||
480                             isOutOfDate() || isModulesImport();
481   }
482 };
483 
484 /// An RAII object for [un]poisoning an identifier within a scope.
485 ///
486 /// \p II is allowed to be null, in which case objects of this type have
487 /// no effect.
488 class PoisonIdentifierRAIIObject {
489   IdentifierInfo *const II;
490   const bool OldValue;
491 
492 public:
493   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
494     : II(II), OldValue(II ? II->isPoisoned() : false) {
495     if(II)
496       II->setIsPoisoned(NewValue);
497   }
498 
499   ~PoisonIdentifierRAIIObject() {
500     if(II)
501       II->setIsPoisoned(OldValue);
502   }
503 };
504 
505 /// An iterator that walks over all of the known identifiers
506 /// in the lookup table.
507 ///
508 /// Since this iterator uses an abstract interface via virtual
509 /// functions, it uses an object-oriented interface rather than the
510 /// more standard C++ STL iterator interface. In this OO-style
511 /// iteration, the single function \c Next() provides dereference,
512 /// advance, and end-of-sequence checking in a single
513 /// operation. Subclasses of this iterator type will provide the
514 /// actual functionality.
515 class IdentifierIterator {
516 protected:
517   IdentifierIterator() = default;
518 
519 public:
520   IdentifierIterator(const IdentifierIterator &) = delete;
521   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
522 
523   virtual ~IdentifierIterator();
524 
525   /// Retrieve the next string in the identifier table and
526   /// advances the iterator for the following string.
527   ///
528   /// \returns The next string in the identifier table. If there is
529   /// no such string, returns an empty \c StringRef.
530   virtual StringRef Next() = 0;
531 };
532 
533 /// Provides lookups to, and iteration over, IdentiferInfo objects.
534 class IdentifierInfoLookup {
535 public:
536   virtual ~IdentifierInfoLookup();
537 
538   /// Return the IdentifierInfo for the specified named identifier.
539   ///
540   /// Unlike the version in IdentifierTable, this returns a pointer instead
541   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
542   /// be found.
543   virtual IdentifierInfo* get(StringRef Name) = 0;
544 
545   /// Retrieve an iterator into the set of all identifiers
546   /// known to this identifier lookup source.
547   ///
548   /// This routine provides access to all of the identifiers known to
549   /// the identifier lookup, allowing access to the contents of the
550   /// identifiers without introducing the overhead of constructing
551   /// IdentifierInfo objects for each.
552   ///
553   /// \returns A new iterator into the set of known identifiers. The
554   /// caller is responsible for deleting this iterator.
555   virtual IdentifierIterator *getIdentifiers();
556 };
557 
558 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
559 ///
560 /// This has no other purpose, but this is an extremely performance-critical
561 /// piece of the code, as each occurrence of every identifier goes through
562 /// here when lexed.
563 class IdentifierTable {
564   // Shark shows that using MallocAllocator is *much* slower than using this
565   // BumpPtrAllocator!
566   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
567   HashTableTy HashTable;
568 
569   IdentifierInfoLookup* ExternalLookup;
570 
571 public:
572   /// Create the identifier table.
573   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
574 
575   /// Create the identifier table, populating it with info about the
576   /// language keywords for the language specified by \p LangOpts.
577   explicit IdentifierTable(const LangOptions &LangOpts,
578                            IdentifierInfoLookup *ExternalLookup = nullptr);
579 
580   /// Set the external identifier lookup mechanism.
581   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
582     ExternalLookup = IILookup;
583   }
584 
585   /// Retrieve the external identifier lookup object, if any.
586   IdentifierInfoLookup *getExternalIdentifierLookup() const {
587     return ExternalLookup;
588   }
589 
590   llvm::BumpPtrAllocator& getAllocator() {
591     return HashTable.getAllocator();
592   }
593 
594   /// Return the identifier token info for the specified named
595   /// identifier.
596   IdentifierInfo &get(StringRef Name) {
597     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
598 
599     IdentifierInfo *&II = Entry.second;
600     if (II) return *II;
601 
602     // No entry; if we have an external lookup, look there first.
603     if (ExternalLookup) {
604       II = ExternalLookup->get(Name);
605       if (II)
606         return *II;
607     }
608 
609     // Lookups failed, make a new IdentifierInfo.
610     void *Mem = getAllocator().Allocate<IdentifierInfo>();
611     II = new (Mem) IdentifierInfo();
612 
613     // Make sure getName() knows how to find the IdentifierInfo
614     // contents.
615     II->Entry = &Entry;
616 
617     return *II;
618   }
619 
620   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
621     IdentifierInfo &II = get(Name);
622     II.TokenID = TokenCode;
623     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
624     return II;
625   }
626 
627   /// Gets an IdentifierInfo for the given name without consulting
628   ///        external sources.
629   ///
630   /// This is a version of get() meant for external sources that want to
631   /// introduce or modify an identifier. If they called get(), they would
632   /// likely end up in a recursion.
633   IdentifierInfo &getOwn(StringRef Name) {
634     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
635 
636     IdentifierInfo *&II = Entry.second;
637     if (II)
638       return *II;
639 
640     // Lookups failed, make a new IdentifierInfo.
641     void *Mem = getAllocator().Allocate<IdentifierInfo>();
642     II = new (Mem) IdentifierInfo();
643 
644     // Make sure getName() knows how to find the IdentifierInfo
645     // contents.
646     II->Entry = &Entry;
647 
648     // If this is the 'import' contextual keyword, mark it as such.
649     if (Name.equals("import"))
650       II->setModulesImport(true);
651 
652     return *II;
653   }
654 
655   using iterator = HashTableTy::const_iterator;
656   using const_iterator = HashTableTy::const_iterator;
657 
658   iterator begin() const { return HashTable.begin(); }
659   iterator end() const   { return HashTable.end(); }
660   unsigned size() const  { return HashTable.size(); }
661 
662   iterator find(StringRef Name) const { return HashTable.find(Name); }
663 
664   /// Print some statistics to stderr that indicate how well the
665   /// hashing is doing.
666   void PrintStats() const;
667 
668   /// Populate the identifier table with info about the language keywords
669   /// for the language specified by \p LangOpts.
670   void AddKeywords(const LangOptions &LangOpts);
671 };
672 
673 /// A family of Objective-C methods.
674 ///
675 /// These families have no inherent meaning in the language, but are
676 /// nonetheless central enough in the existing implementations to
677 /// merit direct AST support.  While, in theory, arbitrary methods can
678 /// be considered to form families, we focus here on the methods
679 /// involving allocation and retain-count management, as these are the
680 /// most "core" and the most likely to be useful to diverse clients
681 /// without extra information.
682 ///
683 /// Both selectors and actual method declarations may be classified
684 /// into families.  Method families may impose additional restrictions
685 /// beyond their selector name; for example, a method called '_init'
686 /// that returns void is not considered to be in the 'init' family
687 /// (but would be if it returned 'id').  It is also possible to
688 /// explicitly change or remove a method's family.  Therefore the
689 /// method's family should be considered the single source of truth.
690 enum ObjCMethodFamily {
691   /// No particular method family.
692   OMF_None,
693 
694   // Selectors in these families may have arbitrary arity, may be
695   // written with arbitrary leading underscores, and may have
696   // additional CamelCase "words" in their first selector chunk
697   // following the family name.
698   OMF_alloc,
699   OMF_copy,
700   OMF_init,
701   OMF_mutableCopy,
702   OMF_new,
703 
704   // These families are singletons consisting only of the nullary
705   // selector with the given name.
706   OMF_autorelease,
707   OMF_dealloc,
708   OMF_finalize,
709   OMF_release,
710   OMF_retain,
711   OMF_retainCount,
712   OMF_self,
713   OMF_initialize,
714 
715   // performSelector families
716   OMF_performSelector
717 };
718 
719 /// Enough bits to store any enumerator in ObjCMethodFamily or
720 /// InvalidObjCMethodFamily.
721 enum { ObjCMethodFamilyBitWidth = 4 };
722 
723 /// An invalid value of ObjCMethodFamily.
724 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
725 
726 /// A family of Objective-C methods.
727 ///
728 /// These are family of methods whose result type is initially 'id', but
729 /// but are candidate for the result type to be changed to 'instancetype'.
730 enum ObjCInstanceTypeFamily {
731   OIT_None,
732   OIT_Array,
733   OIT_Dictionary,
734   OIT_Singleton,
735   OIT_Init,
736   OIT_ReturnsSelf
737 };
738 
739 enum ObjCStringFormatFamily {
740   SFF_None,
741   SFF_NSString,
742   SFF_CFString
743 };
744 
745 /// Smart pointer class that efficiently represents Objective-C method
746 /// names.
747 ///
748 /// This class will either point to an IdentifierInfo or a
749 /// MultiKeywordSelector (which is private). This enables us to optimize
750 /// selectors that take no arguments and selectors that take 1 argument, which
751 /// accounts for 78% of all selectors in Cocoa.h.
752 class Selector {
753   friend class Diagnostic;
754   friend class SelectorTable; // only the SelectorTable can create these
755   friend class DeclarationName; // and the AST's DeclarationName.
756 
757   enum IdentifierInfoFlag {
758     // Empty selector = 0. Note that these enumeration values must
759     // correspond to the enumeration values of DeclarationName::StoredNameKind
760     ZeroArg  = 0x01,
761     OneArg   = 0x02,
762     MultiArg = 0x07,
763     ArgFlags = 0x07
764   };
765 
766   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
767   /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
768   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
769   /// 8 bytes even on 32 bits archs because of DeclarationName.
770   uintptr_t InfoPtr = 0;
771 
772   Selector(IdentifierInfo *II, unsigned nArgs) {
773     InfoPtr = reinterpret_cast<uintptr_t>(II);
774     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
775     assert(nArgs < 2 && "nArgs not equal to 0/1");
776     InfoPtr |= nArgs+1;
777   }
778 
779   Selector(MultiKeywordSelector *SI) {
780     InfoPtr = reinterpret_cast<uintptr_t>(SI);
781     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
782     InfoPtr |= MultiArg;
783   }
784 
785   IdentifierInfo *getAsIdentifierInfo() const {
786     if (getIdentifierInfoFlag() < MultiArg)
787       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
788     return nullptr;
789   }
790 
791   MultiKeywordSelector *getMultiKeywordSelector() const {
792     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
793   }
794 
795   unsigned getIdentifierInfoFlag() const {
796     return InfoPtr & ArgFlags;
797   }
798 
799   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
800 
801   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
802 
803 public:
804   /// The default ctor should only be used when creating data structures that
805   ///  will contain selectors.
806   Selector() = default;
807   explicit Selector(uintptr_t V) : InfoPtr(V) {}
808 
809   /// operator==/!= - Indicate whether the specified selectors are identical.
810   bool operator==(Selector RHS) const {
811     return InfoPtr == RHS.InfoPtr;
812   }
813   bool operator!=(Selector RHS) const {
814     return InfoPtr != RHS.InfoPtr;
815   }
816 
817   void *getAsOpaquePtr() const {
818     return reinterpret_cast<void*>(InfoPtr);
819   }
820 
821   /// Determine whether this is the empty selector.
822   bool isNull() const { return InfoPtr == 0; }
823 
824   // Predicates to identify the selector type.
825   bool isKeywordSelector() const {
826     return getIdentifierInfoFlag() != ZeroArg;
827   }
828 
829   bool isUnarySelector() const {
830     return getIdentifierInfoFlag() == ZeroArg;
831   }
832 
833   /// If this selector is the specific keyword selector described by Names.
834   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
835 
836   /// If this selector is the specific unary selector described by Name.
837   bool isUnarySelector(StringRef Name) const;
838 
839   unsigned getNumArgs() const;
840 
841   /// Retrieve the identifier at a given position in the selector.
842   ///
843   /// Note that the identifier pointer returned may be NULL. Clients that only
844   /// care about the text of the identifier string, and not the specific,
845   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
846   /// an empty string when the identifier pointer would be NULL.
847   ///
848   /// \param argIndex The index for which we want to retrieve the identifier.
849   /// This index shall be less than \c getNumArgs() unless this is a keyword
850   /// selector, in which case 0 is the only permissible value.
851   ///
852   /// \returns the uniqued identifier for this slot, or NULL if this slot has
853   /// no corresponding identifier.
854   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
855 
856   /// Retrieve the name at a given position in the selector.
857   ///
858   /// \param argIndex The index for which we want to retrieve the name.
859   /// This index shall be less than \c getNumArgs() unless this is a keyword
860   /// selector, in which case 0 is the only permissible value.
861   ///
862   /// \returns the name for this slot, which may be the empty string if no
863   /// name was supplied.
864   StringRef getNameForSlot(unsigned argIndex) const;
865 
866   /// Derive the full selector name (e.g. "foo:bar:") and return
867   /// it as an std::string.
868   std::string getAsString() const;
869 
870   /// Prints the full selector name (e.g. "foo:bar:").
871   void print(llvm::raw_ostream &OS) const;
872 
873   void dump() const;
874 
875   /// Derive the conventional family of this method.
876   ObjCMethodFamily getMethodFamily() const {
877     return getMethodFamilyImpl(*this);
878   }
879 
880   ObjCStringFormatFamily getStringFormatFamily() const {
881     return getStringFormatFamilyImpl(*this);
882   }
883 
884   static Selector getEmptyMarker() {
885     return Selector(uintptr_t(-1));
886   }
887 
888   static Selector getTombstoneMarker() {
889     return Selector(uintptr_t(-2));
890   }
891 
892   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
893 };
894 
895 /// This table allows us to fully hide how we implement
896 /// multi-keyword caching.
897 class SelectorTable {
898   // Actually a SelectorTableImpl
899   void *Impl;
900 
901 public:
902   SelectorTable();
903   SelectorTable(const SelectorTable &) = delete;
904   SelectorTable &operator=(const SelectorTable &) = delete;
905   ~SelectorTable();
906 
907   /// Can create any sort of selector.
908   ///
909   /// \p NumArgs indicates whether this is a no argument selector "foo", a
910   /// single argument selector "foo:" or multi-argument "foo:bar:".
911   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
912 
913   Selector getUnarySelector(IdentifierInfo *ID) {
914     return Selector(ID, 1);
915   }
916 
917   Selector getNullarySelector(IdentifierInfo *ID) {
918     return Selector(ID, 0);
919   }
920 
921   /// Return the total amount of memory allocated for managing selectors.
922   size_t getTotalMemory() const;
923 
924   /// Return the default setter name for the given identifier.
925   ///
926   /// This is "set" + \p Name where the initial character of \p Name
927   /// has been capitalized.
928   static SmallString<64> constructSetterName(StringRef Name);
929 
930   /// Return the default setter selector for the given identifier.
931   ///
932   /// This is "set" + \p Name where the initial character of \p Name
933   /// has been capitalized.
934   static Selector constructSetterSelector(IdentifierTable &Idents,
935                                           SelectorTable &SelTable,
936                                           const IdentifierInfo *Name);
937 
938   /// Return the property name for the given setter selector.
939   static std::string getPropertyNameFromSetterSelector(Selector Sel);
940 };
941 
942 namespace detail {
943 
944 /// DeclarationNameExtra is used as a base of various uncommon special names.
945 /// This class is needed since DeclarationName has not enough space to store
946 /// the kind of every possible names. Therefore the kind of common names is
947 /// stored directly in DeclarationName, and the kind of uncommon names is
948 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
949 /// DeclarationName needs the lower 3 bits to store the kind of common names.
950 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
951 /// here is very likely to require changes in DeclarationName(Table).
952 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
953   friend class clang::DeclarationName;
954   friend class clang::DeclarationNameTable;
955 
956 protected:
957   /// The kind of "extra" information stored in the DeclarationName. See
958   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
959   /// are used. Note that DeclarationName depends on the numerical values
960   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
961   /// for more info.
962   enum ExtraKind {
963     CXXDeductionGuideName,
964     CXXLiteralOperatorName,
965     CXXUsingDirective,
966     ObjCMultiArgSelector
967   };
968 
969   /// ExtraKindOrNumArgs has one of the following meaning:
970   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
971   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
972   ///    a CXXLiteralOperatorIdName.
973   ///
974   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
975   ///
976   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
977   ///    the number of arguments in the Objective-C selector, in which
978   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
979   unsigned ExtraKindOrNumArgs;
980 
981   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
982   DeclarationNameExtra(unsigned NumArgs)
983       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
984 
985   /// Return the corresponding ExtraKind.
986   ExtraKind getKind() const {
987     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
988                                           (unsigned)ObjCMultiArgSelector
989                                       ? (unsigned)ObjCMultiArgSelector
990                                       : ExtraKindOrNumArgs);
991   }
992 
993   /// Return the number of arguments in an ObjC selector. Only valid when this
994   /// is indeed an ObjCMultiArgSelector.
995   unsigned getNumArgs() const {
996     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
997            "getNumArgs called but this is not an ObjC selector!");
998     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
999   }
1000 };
1001 
1002 } // namespace detail
1003 
1004 }  // namespace clang
1005 
1006 namespace llvm {
1007 
1008 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1009 /// DenseSets.
1010 template <>
1011 struct DenseMapInfo<clang::Selector> {
1012   static clang::Selector getEmptyKey() {
1013     return clang::Selector::getEmptyMarker();
1014   }
1015 
1016   static clang::Selector getTombstoneKey() {
1017     return clang::Selector::getTombstoneMarker();
1018   }
1019 
1020   static unsigned getHashValue(clang::Selector S);
1021 
1022   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1023     return LHS == RHS;
1024   }
1025 };
1026 
1027 template<>
1028 struct PointerLikeTypeTraits<clang::Selector> {
1029   static const void *getAsVoidPointer(clang::Selector P) {
1030     return P.getAsOpaquePtr();
1031   }
1032 
1033   static clang::Selector getFromVoidPointer(const void *P) {
1034     return clang::Selector(reinterpret_cast<uintptr_t>(P));
1035   }
1036 
1037   static constexpr int NumLowBitsAvailable = 0;
1038 };
1039 
1040 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
1041 // are not guaranteed to be 8-byte aligned.
1042 template<>
1043 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
1044   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
1045     return P;
1046   }
1047 
1048   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
1049     return static_cast<clang::IdentifierInfo*>(P);
1050   }
1051 
1052   static constexpr int NumLowBitsAvailable = 1;
1053 };
1054 
1055 template<>
1056 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
1057   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
1058     return P;
1059   }
1060 
1061   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
1062     return static_cast<const clang::IdentifierInfo*>(P);
1063   }
1064 
1065   static constexpr int NumLowBitsAvailable = 1;
1066 };
1067 
1068 } // namespace llvm
1069 
1070 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1071