1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/DenseMapInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/PointerLikeTypeTraits.h"
27 #include "llvm/Support/type_traits.h"
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 #include <string>
33 #include <utility>
34 
35 namespace clang {
36 
37 class DeclarationName;
38 class DeclarationNameTable;
39 class IdentifierInfo;
40 class LangOptions;
41 class MultiKeywordSelector;
42 class SourceLocation;
43 
44 enum class ReservedIdentifierStatus {
45   NotReserved = 0,
46   StartsWithUnderscoreAtGlobalScope,
47   StartsWithUnderscoreAndIsExternC,
48   StartsWithDoubleUnderscore,
49   StartsWithUnderscoreFollowedByCapitalLetter,
50   ContainsDoubleUnderscore,
51 };
52 
53 enum class ReservedLiteralSuffixIdStatus {
54   NotReserved = 0,
55   NotStartsWithUnderscore,
56   ContainsDoubleUnderscore,
57 };
58 
59 /// Determine whether an identifier is reserved for use as a name at global
60 /// scope. Such identifiers might be implementation-specific global functions
61 /// or variables.
62 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
63   return Status != ReservedIdentifierStatus::NotReserved;
64 }
65 
66 /// Determine whether an identifier is reserved in all contexts. Such
67 /// identifiers might be implementation-specific keywords or macros, for
68 /// example.
69 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
70   return Status != ReservedIdentifierStatus::NotReserved &&
71          Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
72          Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
73 }
74 
75 /// A simple pair of identifier info and location.
76 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
77 
78 /// IdentifierInfo and other related classes are aligned to
79 /// 8 bytes so that DeclarationName can use the lower 3 bits
80 /// of a pointer to one of these classes.
81 enum { IdentifierInfoAlignment = 8 };
82 
83 static constexpr int ObjCOrBuiltinIDBits = 16;
84 
85 /// The "layout" of ObjCOrBuiltinID is:
86 ///  - The first value (0) represents "not a special identifier".
87 ///  - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not
88 ///    including objc_not_keyword).
89 ///  - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent
90 ///    InterestingIdentifierKinds (not including not_interesting).
91 ///  - The rest of the values represent builtin IDs (not including NotBuiltin).
92 static constexpr int FirstObjCKeywordID = 1;
93 static constexpr int LastObjCKeywordID =
94     FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2;
95 static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1;
96 static constexpr int LastInterestingIdentifierID =
97     FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2;
98 static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1;
99 
100 /// One of these records is kept for each identifier that
101 /// is lexed.  This contains information about whether the token was \#define'd,
102 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
103 /// variable or function name).  The preprocessor keeps this information in a
104 /// set, and all tok::identifier tokens have a pointer to one of these.
105 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
106 class alignas(IdentifierInfoAlignment) IdentifierInfo {
107   friend class IdentifierTable;
108 
109   // Front-end token ID or tok::identifier.
110   unsigned TokenID : 9;
111 
112   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
113   // First NUM_OBJC_KEYWORDS values are for Objective-C,
114   // the remaining values are for builtins.
115   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
116 
117   // True if there is a #define for this.
118   unsigned HasMacro : 1;
119 
120   // True if there was a #define for this.
121   unsigned HadMacro : 1;
122 
123   // True if the identifier is a language extension.
124   unsigned IsExtension : 1;
125 
126   // True if the identifier is a keyword in a newer or proposed Standard.
127   unsigned IsFutureCompatKeyword : 1;
128 
129   // True if the identifier is poisoned.
130   unsigned IsPoisoned : 1;
131 
132   // True if the identifier is a C++ operator keyword.
133   unsigned IsCPPOperatorKeyword : 1;
134 
135   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
136   // See comment about RecomputeNeedsHandleIdentifier for more info.
137   unsigned NeedsHandleIdentifier : 1;
138 
139   // True if the identifier was loaded (at least partially) from an AST file.
140   unsigned IsFromAST : 1;
141 
142   // True if the identifier has changed from the definition
143   // loaded from an AST file.
144   unsigned ChangedAfterLoad : 1;
145 
146   // True if the identifier's frontend information has changed from the
147   // definition loaded from an AST file.
148   unsigned FEChangedAfterLoad : 1;
149 
150   // True if revertTokenIDToIdentifier was called.
151   unsigned RevertedTokenID : 1;
152 
153   // True if there may be additional information about
154   // this identifier stored externally.
155   unsigned OutOfDate : 1;
156 
157   // True if this is the 'import' contextual keyword.
158   unsigned IsModulesImport : 1;
159 
160   // True if this is a mangled OpenMP variant name.
161   unsigned IsMangledOpenMPVariantName : 1;
162 
163   // True if this is a deprecated macro.
164   unsigned IsDeprecatedMacro : 1;
165 
166   // True if this macro is unsafe in headers.
167   unsigned IsRestrictExpansion : 1;
168 
169   // True if this macro is final.
170   unsigned IsFinal : 1;
171 
172   // 22 bits left in a 64-bit word.
173 
174   // Managed by the language front-end.
175   void *FETokenInfo = nullptr;
176 
177   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
178 
179   IdentifierInfo()
180       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
181         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
182         IsPoisoned(false), IsCPPOperatorKeyword(false),
183         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
184         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
185         IsModulesImport(false), IsMangledOpenMPVariantName(false),
186         IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
187 
188 public:
189   IdentifierInfo(const IdentifierInfo &) = delete;
190   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
191   IdentifierInfo(IdentifierInfo &&) = delete;
192   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
193 
194   /// Return true if this is the identifier for the specified string.
195   ///
196   /// This is intended to be used for string literals only: II->isStr("foo").
197   template <std::size_t StrLen>
198   bool isStr(const char (&Str)[StrLen]) const {
199     return getLength() == StrLen-1 &&
200            memcmp(getNameStart(), Str, StrLen-1) == 0;
201   }
202 
203   /// Return true if this is the identifier for the specified StringRef.
204   bool isStr(llvm::StringRef Str) const {
205     llvm::StringRef ThisStr(getNameStart(), getLength());
206     return ThisStr == Str;
207   }
208 
209   /// Return the beginning of the actual null-terminated string for this
210   /// identifier.
211   const char *getNameStart() const { return Entry->getKeyData(); }
212 
213   /// Efficiently return the length of this identifier info.
214   unsigned getLength() const { return Entry->getKeyLength(); }
215 
216   /// Return the actual identifier string.
217   StringRef getName() const {
218     return StringRef(getNameStart(), getLength());
219   }
220 
221   /// Return true if this identifier is \#defined to some other value.
222   /// \note The current definition may be in a module and not currently visible.
223   bool hasMacroDefinition() const {
224     return HasMacro;
225   }
226   void setHasMacroDefinition(bool Val) {
227     if (HasMacro == Val) return;
228 
229     HasMacro = Val;
230     if (Val) {
231       NeedsHandleIdentifier = true;
232       HadMacro = true;
233     } else {
234       // If this is a final macro, make the deprecation and header unsafe bits
235       // stick around after the undefinition so they apply to any redefinitions.
236       if (!IsFinal) {
237         // Because calling the setters of these calls recomputes, just set them
238         // manually to avoid recomputing a bunch of times.
239         IsDeprecatedMacro = false;
240         IsRestrictExpansion = false;
241       }
242       RecomputeNeedsHandleIdentifier();
243     }
244   }
245   /// Returns true if this identifier was \#defined to some value at any
246   /// moment. In this case there should be an entry for the identifier in the
247   /// macro history table in Preprocessor.
248   bool hadMacroDefinition() const {
249     return HadMacro;
250   }
251 
252   bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
253 
254   void setIsDeprecatedMacro(bool Val) {
255     if (IsDeprecatedMacro == Val)
256       return;
257     IsDeprecatedMacro = Val;
258     if (Val)
259       NeedsHandleIdentifier = true;
260     else
261       RecomputeNeedsHandleIdentifier();
262   }
263 
264   bool isRestrictExpansion() const { return IsRestrictExpansion; }
265 
266   void setIsRestrictExpansion(bool Val) {
267     if (IsRestrictExpansion == Val)
268       return;
269     IsRestrictExpansion = Val;
270     if (Val)
271       NeedsHandleIdentifier = true;
272     else
273       RecomputeNeedsHandleIdentifier();
274   }
275 
276   bool isFinal() const { return IsFinal; }
277 
278   void setIsFinal(bool Val) { IsFinal = Val; }
279 
280   /// If this is a source-language token (e.g. 'for'), this API
281   /// can be used to cause the lexer to map identifiers to source-language
282   /// tokens.
283   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
284 
285   /// True if revertTokenIDToIdentifier() was called.
286   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
287 
288   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
289   /// compatibility.
290   ///
291   /// TokenID is normally read-only but there are 2 instances where we revert it
292   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
293   /// using this method so we can inform serialization about it.
294   void revertTokenIDToIdentifier() {
295     assert(TokenID != tok::identifier && "Already at tok::identifier");
296     TokenID = tok::identifier;
297     RevertedTokenID = true;
298   }
299   void revertIdentifierToTokenID(tok::TokenKind TK) {
300     assert(TokenID == tok::identifier && "Should be at tok::identifier");
301     TokenID = TK;
302     RevertedTokenID = false;
303   }
304 
305   /// Return the preprocessor keyword ID for this identifier.
306   ///
307   /// For example, "define" will return tok::pp_define.
308   tok::PPKeywordKind getPPKeywordID() const;
309 
310   /// Return the Objective-C keyword ID for the this identifier.
311   ///
312   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
313   tok::ObjCKeywordKind getObjCKeywordID() const {
314     static_assert(FirstObjCKeywordID == 1,
315                   "hard-coding this assumption to simplify code");
316     if (ObjCOrBuiltinID <= LastObjCKeywordID)
317       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
318     else
319       return tok::objc_not_keyword;
320   }
321   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
322 
323   /// Return a value indicating whether this is a builtin function.
324   ///
325   /// 0 is not-built-in. 1+ are specific builtin functions.
326   unsigned getBuiltinID() const {
327     if (ObjCOrBuiltinID >= FirstBuiltinID)
328       return 1 + (ObjCOrBuiltinID - FirstBuiltinID);
329     else
330       return 0;
331   }
332   void setBuiltinID(unsigned ID) {
333     assert(ID != 0);
334     ObjCOrBuiltinID = FirstBuiltinID + (ID - 1);
335     assert(getBuiltinID() == ID && "ID too large for field!");
336   }
337   void clearBuiltinID() { ObjCOrBuiltinID = 0; }
338 
339   tok::InterestingIdentifierKind getInterestingIdentifierID() const {
340     if (ObjCOrBuiltinID >= FirstInterestingIdentifierID &&
341         ObjCOrBuiltinID <= LastInterestingIdentifierID)
342       return tok::InterestingIdentifierKind(
343           1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID));
344     else
345       return tok::not_interesting;
346   }
347   void setInterestingIdentifierID(unsigned ID) {
348     assert(ID != tok::not_interesting);
349     ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1);
350     assert(getInterestingIdentifierID() == ID && "ID too large for field!");
351   }
352 
353   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
354   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
355 
356   /// get/setExtension - Initialize information about whether or not this
357   /// language token is an extension.  This controls extension warnings, and is
358   /// only valid if a custom token ID is set.
359   bool isExtensionToken() const { return IsExtension; }
360   void setIsExtensionToken(bool Val) {
361     IsExtension = Val;
362     if (Val)
363       NeedsHandleIdentifier = true;
364     else
365       RecomputeNeedsHandleIdentifier();
366   }
367 
368   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
369   /// this language token is a keyword in a newer or proposed Standard. This
370   /// controls compatibility warnings, and is only true when not parsing the
371   /// corresponding Standard. Once a compatibility problem has been diagnosed
372   /// with this keyword, the flag will be cleared.
373   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
374   void setIsFutureCompatKeyword(bool Val) {
375     IsFutureCompatKeyword = Val;
376     if (Val)
377       NeedsHandleIdentifier = true;
378     else
379       RecomputeNeedsHandleIdentifier();
380   }
381 
382   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
383   /// Preprocessor will emit an error every time this token is used.
384   void setIsPoisoned(bool Value = true) {
385     IsPoisoned = Value;
386     if (Value)
387       NeedsHandleIdentifier = true;
388     else
389       RecomputeNeedsHandleIdentifier();
390   }
391 
392   /// Return true if this token has been poisoned.
393   bool isPoisoned() const { return IsPoisoned; }
394 
395   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
396   /// this identifier is a C++ alternate representation of an operator.
397   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
398     IsCPPOperatorKeyword = Val;
399   }
400   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
401 
402   /// Return true if this token is a keyword in the specified language.
403   bool isKeyword(const LangOptions &LangOpts) const;
404 
405   /// Return true if this token is a C++ keyword in the specified
406   /// language.
407   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
408 
409   /// Get and set FETokenInfo. The language front-end is allowed to associate
410   /// arbitrary metadata with this token.
411   void *getFETokenInfo() const { return FETokenInfo; }
412   void setFETokenInfo(void *T) { FETokenInfo = T; }
413 
414   /// Return true if the Preprocessor::HandleIdentifier must be called
415   /// on a token of this identifier.
416   ///
417   /// If this returns false, we know that HandleIdentifier will not affect
418   /// the token.
419   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
420 
421   /// Return true if the identifier in its current state was loaded
422   /// from an AST file.
423   bool isFromAST() const { return IsFromAST; }
424 
425   void setIsFromAST() { IsFromAST = true; }
426 
427   /// Determine whether this identifier has changed since it was loaded
428   /// from an AST file.
429   bool hasChangedSinceDeserialization() const {
430     return ChangedAfterLoad;
431   }
432 
433   /// Note that this identifier has changed since it was loaded from
434   /// an AST file.
435   void setChangedSinceDeserialization() {
436     ChangedAfterLoad = true;
437   }
438 
439   /// Determine whether the frontend token information for this
440   /// identifier has changed since it was loaded from an AST file.
441   bool hasFETokenInfoChangedSinceDeserialization() const {
442     return FEChangedAfterLoad;
443   }
444 
445   /// Note that the frontend token information for this identifier has
446   /// changed since it was loaded from an AST file.
447   void setFETokenInfoChangedSinceDeserialization() {
448     FEChangedAfterLoad = true;
449   }
450 
451   /// Determine whether the information for this identifier is out of
452   /// date with respect to the external source.
453   bool isOutOfDate() const { return OutOfDate; }
454 
455   /// Set whether the information for this identifier is out of
456   /// date with respect to the external source.
457   void setOutOfDate(bool OOD) {
458     OutOfDate = OOD;
459     if (OOD)
460       NeedsHandleIdentifier = true;
461     else
462       RecomputeNeedsHandleIdentifier();
463   }
464 
465   /// Determine whether this is the contextual keyword \c import.
466   bool isModulesImport() const { return IsModulesImport; }
467 
468   /// Set whether this identifier is the contextual keyword \c import.
469   void setModulesImport(bool I) {
470     IsModulesImport = I;
471     if (I)
472       NeedsHandleIdentifier = true;
473     else
474       RecomputeNeedsHandleIdentifier();
475   }
476 
477   /// Determine whether this is the mangled name of an OpenMP variant.
478   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
479 
480   /// Set whether this is the mangled name of an OpenMP variant.
481   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
482 
483   /// Return true if this identifier is an editor placeholder.
484   ///
485   /// Editor placeholders are produced by the code-completion engine and are
486   /// represented as characters between '<#' and '#>' in the source code. An
487   /// example of auto-completed call with a placeholder parameter is shown
488   /// below:
489   /// \code
490   ///   function(<#int x#>);
491   /// \endcode
492   bool isEditorPlaceholder() const {
493     return getName().startswith("<#") && getName().endswith("#>");
494   }
495 
496   /// Determine whether \p this is a name reserved for the implementation (C99
497   /// 7.1.3, C++ [lib.global.names]).
498   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
499 
500   /// Determine whether \p this is a name reserved for future standardization or
501   /// the implementation (C++ [usrlit.suffix]).
502   ReservedLiteralSuffixIdStatus isReservedLiteralSuffixId() const;
503 
504   /// If the identifier is an "uglified" reserved name, return a cleaned form.
505   /// e.g. _Foo => Foo. Otherwise, just returns the name.
506   StringRef deuglifiedName() const;
507 
508   /// Provide less than operator for lexicographical sorting.
509   bool operator<(const IdentifierInfo &RHS) const {
510     return getName() < RHS.getName();
511   }
512 
513 private:
514   /// The Preprocessor::HandleIdentifier does several special (but rare)
515   /// things to identifiers of various sorts.  For example, it changes the
516   /// \c for keyword token from tok::identifier to tok::for.
517   ///
518   /// This method is very tied to the definition of HandleIdentifier.  Any
519   /// change to it should be reflected here.
520   void RecomputeNeedsHandleIdentifier() {
521     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
522                             isExtensionToken() || isFutureCompatKeyword() ||
523                             isOutOfDate() || isModulesImport();
524   }
525 };
526 
527 /// An RAII object for [un]poisoning an identifier within a scope.
528 ///
529 /// \p II is allowed to be null, in which case objects of this type have
530 /// no effect.
531 class PoisonIdentifierRAIIObject {
532   IdentifierInfo *const II;
533   const bool OldValue;
534 
535 public:
536   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
537     : II(II), OldValue(II ? II->isPoisoned() : false) {
538     if(II)
539       II->setIsPoisoned(NewValue);
540   }
541 
542   ~PoisonIdentifierRAIIObject() {
543     if(II)
544       II->setIsPoisoned(OldValue);
545   }
546 };
547 
548 /// An iterator that walks over all of the known identifiers
549 /// in the lookup table.
550 ///
551 /// Since this iterator uses an abstract interface via virtual
552 /// functions, it uses an object-oriented interface rather than the
553 /// more standard C++ STL iterator interface. In this OO-style
554 /// iteration, the single function \c Next() provides dereference,
555 /// advance, and end-of-sequence checking in a single
556 /// operation. Subclasses of this iterator type will provide the
557 /// actual functionality.
558 class IdentifierIterator {
559 protected:
560   IdentifierIterator() = default;
561 
562 public:
563   IdentifierIterator(const IdentifierIterator &) = delete;
564   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
565 
566   virtual ~IdentifierIterator();
567 
568   /// Retrieve the next string in the identifier table and
569   /// advances the iterator for the following string.
570   ///
571   /// \returns The next string in the identifier table. If there is
572   /// no such string, returns an empty \c StringRef.
573   virtual StringRef Next() = 0;
574 };
575 
576 /// Provides lookups to, and iteration over, IdentiferInfo objects.
577 class IdentifierInfoLookup {
578 public:
579   virtual ~IdentifierInfoLookup();
580 
581   /// Return the IdentifierInfo for the specified named identifier.
582   ///
583   /// Unlike the version in IdentifierTable, this returns a pointer instead
584   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
585   /// be found.
586   virtual IdentifierInfo* get(StringRef Name) = 0;
587 
588   /// Retrieve an iterator into the set of all identifiers
589   /// known to this identifier lookup source.
590   ///
591   /// This routine provides access to all of the identifiers known to
592   /// the identifier lookup, allowing access to the contents of the
593   /// identifiers without introducing the overhead of constructing
594   /// IdentifierInfo objects for each.
595   ///
596   /// \returns A new iterator into the set of known identifiers. The
597   /// caller is responsible for deleting this iterator.
598   virtual IdentifierIterator *getIdentifiers();
599 };
600 
601 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
602 ///
603 /// This has no other purpose, but this is an extremely performance-critical
604 /// piece of the code, as each occurrence of every identifier goes through
605 /// here when lexed.
606 class IdentifierTable {
607   // Shark shows that using MallocAllocator is *much* slower than using this
608   // BumpPtrAllocator!
609   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
610   HashTableTy HashTable;
611 
612   IdentifierInfoLookup* ExternalLookup;
613 
614 public:
615   /// Create the identifier table.
616   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
617 
618   /// Create the identifier table, populating it with info about the
619   /// language keywords for the language specified by \p LangOpts.
620   explicit IdentifierTable(const LangOptions &LangOpts,
621                            IdentifierInfoLookup *ExternalLookup = nullptr);
622 
623   /// Set the external identifier lookup mechanism.
624   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
625     ExternalLookup = IILookup;
626   }
627 
628   /// Retrieve the external identifier lookup object, if any.
629   IdentifierInfoLookup *getExternalIdentifierLookup() const {
630     return ExternalLookup;
631   }
632 
633   llvm::BumpPtrAllocator& getAllocator() {
634     return HashTable.getAllocator();
635   }
636 
637   /// Return the identifier token info for the specified named
638   /// identifier.
639   IdentifierInfo &get(StringRef Name) {
640     auto &Entry = *HashTable.try_emplace(Name, nullptr).first;
641 
642     IdentifierInfo *&II = Entry.second;
643     if (II) return *II;
644 
645     // No entry; if we have an external lookup, look there first.
646     if (ExternalLookup) {
647       II = ExternalLookup->get(Name);
648       if (II)
649         return *II;
650     }
651 
652     // Lookups failed, make a new IdentifierInfo.
653     void *Mem = getAllocator().Allocate<IdentifierInfo>();
654     II = new (Mem) IdentifierInfo();
655 
656     // Make sure getName() knows how to find the IdentifierInfo
657     // contents.
658     II->Entry = &Entry;
659 
660     return *II;
661   }
662 
663   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
664     IdentifierInfo &II = get(Name);
665     II.TokenID = TokenCode;
666     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
667     return II;
668   }
669 
670   /// Gets an IdentifierInfo for the given name without consulting
671   ///        external sources.
672   ///
673   /// This is a version of get() meant for external sources that want to
674   /// introduce or modify an identifier. If they called get(), they would
675   /// likely end up in a recursion.
676   IdentifierInfo &getOwn(StringRef Name) {
677     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
678 
679     IdentifierInfo *&II = Entry.second;
680     if (II)
681       return *II;
682 
683     // Lookups failed, make a new IdentifierInfo.
684     void *Mem = getAllocator().Allocate<IdentifierInfo>();
685     II = new (Mem) IdentifierInfo();
686 
687     // Make sure getName() knows how to find the IdentifierInfo
688     // contents.
689     II->Entry = &Entry;
690 
691     // If this is the 'import' contextual keyword, mark it as such.
692     if (Name.equals("import"))
693       II->setModulesImport(true);
694 
695     return *II;
696   }
697 
698   using iterator = HashTableTy::const_iterator;
699   using const_iterator = HashTableTy::const_iterator;
700 
701   iterator begin() const { return HashTable.begin(); }
702   iterator end() const   { return HashTable.end(); }
703   unsigned size() const  { return HashTable.size(); }
704 
705   iterator find(StringRef Name) const { return HashTable.find(Name); }
706 
707   /// Print some statistics to stderr that indicate how well the
708   /// hashing is doing.
709   void PrintStats() const;
710 
711   /// Populate the identifier table with info about the language keywords
712   /// for the language specified by \p LangOpts.
713   void AddKeywords(const LangOptions &LangOpts);
714 
715   /// Returns the correct diagnostic to issue for a future-compat diagnostic
716   /// warning. Note, this function assumes the identifier passed has already
717   /// been determined to be a future compatible keyword.
718   diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
719                                      const LangOptions &LangOpts);
720 };
721 
722 /// A family of Objective-C methods.
723 ///
724 /// These families have no inherent meaning in the language, but are
725 /// nonetheless central enough in the existing implementations to
726 /// merit direct AST support.  While, in theory, arbitrary methods can
727 /// be considered to form families, we focus here on the methods
728 /// involving allocation and retain-count management, as these are the
729 /// most "core" and the most likely to be useful to diverse clients
730 /// without extra information.
731 ///
732 /// Both selectors and actual method declarations may be classified
733 /// into families.  Method families may impose additional restrictions
734 /// beyond their selector name; for example, a method called '_init'
735 /// that returns void is not considered to be in the 'init' family
736 /// (but would be if it returned 'id').  It is also possible to
737 /// explicitly change or remove a method's family.  Therefore the
738 /// method's family should be considered the single source of truth.
739 enum ObjCMethodFamily {
740   /// No particular method family.
741   OMF_None,
742 
743   // Selectors in these families may have arbitrary arity, may be
744   // written with arbitrary leading underscores, and may have
745   // additional CamelCase "words" in their first selector chunk
746   // following the family name.
747   OMF_alloc,
748   OMF_copy,
749   OMF_init,
750   OMF_mutableCopy,
751   OMF_new,
752 
753   // These families are singletons consisting only of the nullary
754   // selector with the given name.
755   OMF_autorelease,
756   OMF_dealloc,
757   OMF_finalize,
758   OMF_release,
759   OMF_retain,
760   OMF_retainCount,
761   OMF_self,
762   OMF_initialize,
763 
764   // performSelector families
765   OMF_performSelector
766 };
767 
768 /// Enough bits to store any enumerator in ObjCMethodFamily or
769 /// InvalidObjCMethodFamily.
770 enum { ObjCMethodFamilyBitWidth = 4 };
771 
772 /// An invalid value of ObjCMethodFamily.
773 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
774 
775 /// A family of Objective-C methods.
776 ///
777 /// These are family of methods whose result type is initially 'id', but
778 /// but are candidate for the result type to be changed to 'instancetype'.
779 enum ObjCInstanceTypeFamily {
780   OIT_None,
781   OIT_Array,
782   OIT_Dictionary,
783   OIT_Singleton,
784   OIT_Init,
785   OIT_ReturnsSelf
786 };
787 
788 enum ObjCStringFormatFamily {
789   SFF_None,
790   SFF_NSString,
791   SFF_CFString
792 };
793 
794 /// Smart pointer class that efficiently represents Objective-C method
795 /// names.
796 ///
797 /// This class will either point to an IdentifierInfo or a
798 /// MultiKeywordSelector (which is private). This enables us to optimize
799 /// selectors that take no arguments and selectors that take 1 argument, which
800 /// accounts for 78% of all selectors in Cocoa.h.
801 class Selector {
802   friend class Diagnostic;
803   friend class SelectorTable; // only the SelectorTable can create these
804   friend class DeclarationName; // and the AST's DeclarationName.
805 
806   enum IdentifierInfoFlag {
807     // Empty selector = 0. Note that these enumeration values must
808     // correspond to the enumeration values of DeclarationName::StoredNameKind
809     ZeroArg  = 0x01,
810     OneArg   = 0x02,
811     MultiArg = 0x07,
812     ArgFlags = 0x07
813   };
814 
815   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
816   /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
817   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
818   /// 8 bytes even on 32 bits archs because of DeclarationName.
819   uintptr_t InfoPtr = 0;
820 
821   Selector(IdentifierInfo *II, unsigned nArgs) {
822     InfoPtr = reinterpret_cast<uintptr_t>(II);
823     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
824     assert(nArgs < 2 && "nArgs not equal to 0/1");
825     InfoPtr |= nArgs+1;
826   }
827 
828   Selector(MultiKeywordSelector *SI) {
829     InfoPtr = reinterpret_cast<uintptr_t>(SI);
830     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
831     InfoPtr |= MultiArg;
832   }
833 
834   IdentifierInfo *getAsIdentifierInfo() const {
835     if (getIdentifierInfoFlag() < MultiArg)
836       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
837     return nullptr;
838   }
839 
840   MultiKeywordSelector *getMultiKeywordSelector() const {
841     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
842   }
843 
844   unsigned getIdentifierInfoFlag() const {
845     return InfoPtr & ArgFlags;
846   }
847 
848   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
849 
850   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
851 
852 public:
853   /// The default ctor should only be used when creating data structures that
854   ///  will contain selectors.
855   Selector() = default;
856   explicit Selector(uintptr_t V) : InfoPtr(V) {}
857 
858   /// operator==/!= - Indicate whether the specified selectors are identical.
859   bool operator==(Selector RHS) const {
860     return InfoPtr == RHS.InfoPtr;
861   }
862   bool operator!=(Selector RHS) const {
863     return InfoPtr != RHS.InfoPtr;
864   }
865 
866   void *getAsOpaquePtr() const {
867     return reinterpret_cast<void*>(InfoPtr);
868   }
869 
870   /// Determine whether this is the empty selector.
871   bool isNull() const { return InfoPtr == 0; }
872 
873   // Predicates to identify the selector type.
874   bool isKeywordSelector() const {
875     return getIdentifierInfoFlag() != ZeroArg;
876   }
877 
878   bool isUnarySelector() const {
879     return getIdentifierInfoFlag() == ZeroArg;
880   }
881 
882   /// If this selector is the specific keyword selector described by Names.
883   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
884 
885   /// If this selector is the specific unary selector described by Name.
886   bool isUnarySelector(StringRef Name) const;
887 
888   unsigned getNumArgs() const;
889 
890   /// Retrieve the identifier at a given position in the selector.
891   ///
892   /// Note that the identifier pointer returned may be NULL. Clients that only
893   /// care about the text of the identifier string, and not the specific,
894   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
895   /// an empty string when the identifier pointer would be NULL.
896   ///
897   /// \param argIndex The index for which we want to retrieve the identifier.
898   /// This index shall be less than \c getNumArgs() unless this is a keyword
899   /// selector, in which case 0 is the only permissible value.
900   ///
901   /// \returns the uniqued identifier for this slot, or NULL if this slot has
902   /// no corresponding identifier.
903   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
904 
905   /// Retrieve the name at a given position in the selector.
906   ///
907   /// \param argIndex The index for which we want to retrieve the name.
908   /// This index shall be less than \c getNumArgs() unless this is a keyword
909   /// selector, in which case 0 is the only permissible value.
910   ///
911   /// \returns the name for this slot, which may be the empty string if no
912   /// name was supplied.
913   StringRef getNameForSlot(unsigned argIndex) const;
914 
915   /// Derive the full selector name (e.g. "foo:bar:") and return
916   /// it as an std::string.
917   std::string getAsString() const;
918 
919   /// Prints the full selector name (e.g. "foo:bar:").
920   void print(llvm::raw_ostream &OS) const;
921 
922   void dump() const;
923 
924   /// Derive the conventional family of this method.
925   ObjCMethodFamily getMethodFamily() const {
926     return getMethodFamilyImpl(*this);
927   }
928 
929   ObjCStringFormatFamily getStringFormatFamily() const {
930     return getStringFormatFamilyImpl(*this);
931   }
932 
933   static Selector getEmptyMarker() {
934     return Selector(uintptr_t(-1));
935   }
936 
937   static Selector getTombstoneMarker() {
938     return Selector(uintptr_t(-2));
939   }
940 
941   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
942 };
943 
944 /// This table allows us to fully hide how we implement
945 /// multi-keyword caching.
946 class SelectorTable {
947   // Actually a SelectorTableImpl
948   void *Impl;
949 
950 public:
951   SelectorTable();
952   SelectorTable(const SelectorTable &) = delete;
953   SelectorTable &operator=(const SelectorTable &) = delete;
954   ~SelectorTable();
955 
956   /// Can create any sort of selector.
957   ///
958   /// \p NumArgs indicates whether this is a no argument selector "foo", a
959   /// single argument selector "foo:" or multi-argument "foo:bar:".
960   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
961 
962   Selector getUnarySelector(IdentifierInfo *ID) {
963     return Selector(ID, 1);
964   }
965 
966   Selector getNullarySelector(IdentifierInfo *ID) {
967     return Selector(ID, 0);
968   }
969 
970   /// Return the total amount of memory allocated for managing selectors.
971   size_t getTotalMemory() const;
972 
973   /// Return the default setter name for the given identifier.
974   ///
975   /// This is "set" + \p Name where the initial character of \p Name
976   /// has been capitalized.
977   static SmallString<64> constructSetterName(StringRef Name);
978 
979   /// Return the default setter selector for the given identifier.
980   ///
981   /// This is "set" + \p Name where the initial character of \p Name
982   /// has been capitalized.
983   static Selector constructSetterSelector(IdentifierTable &Idents,
984                                           SelectorTable &SelTable,
985                                           const IdentifierInfo *Name);
986 
987   /// Return the property name for the given setter selector.
988   static std::string getPropertyNameFromSetterSelector(Selector Sel);
989 };
990 
991 namespace detail {
992 
993 /// DeclarationNameExtra is used as a base of various uncommon special names.
994 /// This class is needed since DeclarationName has not enough space to store
995 /// the kind of every possible names. Therefore the kind of common names is
996 /// stored directly in DeclarationName, and the kind of uncommon names is
997 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
998 /// DeclarationName needs the lower 3 bits to store the kind of common names.
999 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
1000 /// here is very likely to require changes in DeclarationName(Table).
1001 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
1002   friend class clang::DeclarationName;
1003   friend class clang::DeclarationNameTable;
1004 
1005 protected:
1006   /// The kind of "extra" information stored in the DeclarationName. See
1007   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
1008   /// are used. Note that DeclarationName depends on the numerical values
1009   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
1010   /// for more info.
1011   enum ExtraKind {
1012     CXXDeductionGuideName,
1013     CXXLiteralOperatorName,
1014     CXXUsingDirective,
1015     ObjCMultiArgSelector
1016   };
1017 
1018   /// ExtraKindOrNumArgs has one of the following meaning:
1019   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
1020   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
1021   ///    a CXXLiteralOperatorIdName.
1022   ///
1023   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
1024   ///
1025   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
1026   ///    the number of arguments in the Objective-C selector, in which
1027   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
1028   unsigned ExtraKindOrNumArgs;
1029 
1030   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
1031   DeclarationNameExtra(unsigned NumArgs)
1032       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
1033 
1034   /// Return the corresponding ExtraKind.
1035   ExtraKind getKind() const {
1036     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
1037                                           (unsigned)ObjCMultiArgSelector
1038                                       ? (unsigned)ObjCMultiArgSelector
1039                                       : ExtraKindOrNumArgs);
1040   }
1041 
1042   /// Return the number of arguments in an ObjC selector. Only valid when this
1043   /// is indeed an ObjCMultiArgSelector.
1044   unsigned getNumArgs() const {
1045     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
1046            "getNumArgs called but this is not an ObjC selector!");
1047     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
1048   }
1049 };
1050 
1051 } // namespace detail
1052 
1053 }  // namespace clang
1054 
1055 namespace llvm {
1056 
1057 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1058 /// DenseSets.
1059 template <>
1060 struct DenseMapInfo<clang::Selector> {
1061   static clang::Selector getEmptyKey() {
1062     return clang::Selector::getEmptyMarker();
1063   }
1064 
1065   static clang::Selector getTombstoneKey() {
1066     return clang::Selector::getTombstoneMarker();
1067   }
1068 
1069   static unsigned getHashValue(clang::Selector S);
1070 
1071   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1072     return LHS == RHS;
1073   }
1074 };
1075 
1076 template<>
1077 struct PointerLikeTypeTraits<clang::Selector> {
1078   static const void *getAsVoidPointer(clang::Selector P) {
1079     return P.getAsOpaquePtr();
1080   }
1081 
1082   static clang::Selector getFromVoidPointer(const void *P) {
1083     return clang::Selector(reinterpret_cast<uintptr_t>(P));
1084   }
1085 
1086   static constexpr int NumLowBitsAvailable = 0;
1087 };
1088 
1089 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
1090 // are not guaranteed to be 8-byte aligned.
1091 template<>
1092 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
1093   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
1094     return P;
1095   }
1096 
1097   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
1098     return static_cast<clang::IdentifierInfo*>(P);
1099   }
1100 
1101   static constexpr int NumLowBitsAvailable = 1;
1102 };
1103 
1104 template<>
1105 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
1106   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
1107     return P;
1108   }
1109 
1110   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
1111     return static_cast<const clang::IdentifierInfo*>(P);
1112   }
1113 
1114   static constexpr int NumLowBitsAvailable = 1;
1115 };
1116 
1117 } // namespace llvm
1118 
1119 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1120