1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/DenseMapInfo.h"
22 #include "llvm/ADT/FoldingSet.h"
23 #include "llvm/ADT/PointerIntPair.h"
24 #include "llvm/ADT/PointerUnion.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringMap.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Allocator.h"
29 #include "llvm/Support/PointerLikeTypeTraits.h"
30 #include "llvm/Support/type_traits.h"
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <cstring>
35 #include <string>
36 #include <utility>
37 
38 namespace clang {
39 
40 class DeclarationName;
41 class DeclarationNameTable;
42 class IdentifierInfo;
43 class LangOptions;
44 class MultiKeywordSelector;
45 class SourceLocation;
46 
47 enum class ReservedIdentifierStatus {
48   NotReserved = 0,
49   StartsWithUnderscoreAtGlobalScope,
50   StartsWithUnderscoreAndIsExternC,
51   StartsWithDoubleUnderscore,
52   StartsWithUnderscoreFollowedByCapitalLetter,
53   ContainsDoubleUnderscore,
54 };
55 
56 enum class ReservedLiteralSuffixIdStatus {
57   NotReserved = 0,
58   NotStartsWithUnderscore,
59   ContainsDoubleUnderscore,
60 };
61 
62 /// Determine whether an identifier is reserved for use as a name at global
63 /// scope. Such identifiers might be implementation-specific global functions
64 /// or variables.
isReservedAtGlobalScope(ReservedIdentifierStatus Status)65 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
66   return Status != ReservedIdentifierStatus::NotReserved;
67 }
68 
69 /// Determine whether an identifier is reserved in all contexts. Such
70 /// identifiers might be implementation-specific keywords or macros, for
71 /// example.
isReservedInAllContexts(ReservedIdentifierStatus Status)72 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
73   return Status != ReservedIdentifierStatus::NotReserved &&
74          Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
75          Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
76 }
77 
78 /// A simple pair of identifier info and location.
79 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
80 
81 /// IdentifierInfo and other related classes are aligned to
82 /// 8 bytes so that DeclarationName can use the lower 3 bits
83 /// of a pointer to one of these classes.
84 enum { IdentifierInfoAlignment = 8 };
85 
86 static constexpr int ObjCOrBuiltinIDBits = 16;
87 
88 /// The "layout" of ObjCOrBuiltinID is:
89 ///  - The first value (0) represents "not a special identifier".
90 ///  - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not
91 ///    including objc_not_keyword).
92 ///  - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent
93 ///    InterestingIdentifierKinds (not including not_interesting).
94 ///  - The rest of the values represent builtin IDs (not including NotBuiltin).
95 static constexpr int FirstObjCKeywordID = 1;
96 static constexpr int LastObjCKeywordID =
97     FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2;
98 static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1;
99 static constexpr int LastInterestingIdentifierID =
100     FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2;
101 static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1;
102 
103 /// One of these records is kept for each identifier that
104 /// is lexed.  This contains information about whether the token was \#define'd,
105 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
106 /// variable or function name).  The preprocessor keeps this information in a
107 /// set, and all tok::identifier tokens have a pointer to one of these.
108 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
alignas(IdentifierInfoAlignment)109 class alignas(IdentifierInfoAlignment) IdentifierInfo {
110   friend class IdentifierTable;
111 
112   // Front-end token ID or tok::identifier.
113   LLVM_PREFERRED_TYPE(tok::TokenKind)
114   unsigned TokenID : 9;
115 
116   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
117   // First NUM_OBJC_KEYWORDS values are for Objective-C,
118   // the remaining values are for builtins.
119   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
120 
121   // True if there is a #define for this.
122   LLVM_PREFERRED_TYPE(bool)
123   unsigned HasMacro : 1;
124 
125   // True if there was a #define for this.
126   LLVM_PREFERRED_TYPE(bool)
127   unsigned HadMacro : 1;
128 
129   // True if the identifier is a language extension.
130   LLVM_PREFERRED_TYPE(bool)
131   unsigned IsExtension : 1;
132 
133   // True if the identifier is a keyword in a newer or proposed Standard.
134   LLVM_PREFERRED_TYPE(bool)
135   unsigned IsFutureCompatKeyword : 1;
136 
137   // True if the identifier is poisoned.
138   LLVM_PREFERRED_TYPE(bool)
139   unsigned IsPoisoned : 1;
140 
141   // True if the identifier is a C++ operator keyword.
142   LLVM_PREFERRED_TYPE(bool)
143   unsigned IsCPPOperatorKeyword : 1;
144 
145   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
146   // See comment about RecomputeNeedsHandleIdentifier for more info.
147   LLVM_PREFERRED_TYPE(bool)
148   unsigned NeedsHandleIdentifier : 1;
149 
150   // True if the identifier was loaded (at least partially) from an AST file.
151   LLVM_PREFERRED_TYPE(bool)
152   unsigned IsFromAST : 1;
153 
154   // True if the identifier has changed from the definition
155   // loaded from an AST file.
156   LLVM_PREFERRED_TYPE(bool)
157   unsigned ChangedAfterLoad : 1;
158 
159   // True if the identifier's frontend information has changed from the
160   // definition loaded from an AST file.
161   LLVM_PREFERRED_TYPE(bool)
162   unsigned FEChangedAfterLoad : 1;
163 
164   // True if revertTokenIDToIdentifier was called.
165   LLVM_PREFERRED_TYPE(bool)
166   unsigned RevertedTokenID : 1;
167 
168   // True if there may be additional information about
169   // this identifier stored externally.
170   LLVM_PREFERRED_TYPE(bool)
171   unsigned OutOfDate : 1;
172 
173   // True if this is the 'import' contextual keyword.
174   LLVM_PREFERRED_TYPE(bool)
175   unsigned IsModulesImport : 1;
176 
177   // True if this is a mangled OpenMP variant name.
178   LLVM_PREFERRED_TYPE(bool)
179   unsigned IsMangledOpenMPVariantName : 1;
180 
181   // True if this is a deprecated macro.
182   LLVM_PREFERRED_TYPE(bool)
183   unsigned IsDeprecatedMacro : 1;
184 
185   // True if this macro is unsafe in headers.
186   LLVM_PREFERRED_TYPE(bool)
187   unsigned IsRestrictExpansion : 1;
188 
189   // True if this macro is final.
190   LLVM_PREFERRED_TYPE(bool)
191   unsigned IsFinal : 1;
192 
193   // 22 bits left in a 64-bit word.
194 
195   // Managed by the language front-end.
196   void *FETokenInfo = nullptr;
197 
198   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
199 
200   IdentifierInfo()
201       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
202         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
203         IsPoisoned(false), IsCPPOperatorKeyword(false),
204         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
205         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
206         IsModulesImport(false), IsMangledOpenMPVariantName(false),
207         IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
208 
209 public:
210   IdentifierInfo(const IdentifierInfo &) = delete;
211   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
212   IdentifierInfo(IdentifierInfo &&) = delete;
213   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
214 
215   /// Return true if this is the identifier for the specified string.
216   ///
217   /// This is intended to be used for string literals only: II->isStr("foo").
218   template <std::size_t StrLen>
219   bool isStr(const char (&Str)[StrLen]) const {
220     return getLength() == StrLen-1 &&
221            memcmp(getNameStart(), Str, StrLen-1) == 0;
222   }
223 
224   /// Return true if this is the identifier for the specified StringRef.
225   bool isStr(llvm::StringRef Str) const {
226     llvm::StringRef ThisStr(getNameStart(), getLength());
227     return ThisStr == Str;
228   }
229 
230   /// Return the beginning of the actual null-terminated string for this
231   /// identifier.
232   const char *getNameStart() const { return Entry->getKeyData(); }
233 
234   /// Efficiently return the length of this identifier info.
235   unsigned getLength() const { return Entry->getKeyLength(); }
236 
237   /// Return the actual identifier string.
238   StringRef getName() const {
239     return StringRef(getNameStart(), getLength());
240   }
241 
242   /// Return true if this identifier is \#defined to some other value.
243   /// \note The current definition may be in a module and not currently visible.
244   bool hasMacroDefinition() const {
245     return HasMacro;
246   }
247   void setHasMacroDefinition(bool Val) {
248     if (HasMacro == Val) return;
249 
250     HasMacro = Val;
251     if (Val) {
252       NeedsHandleIdentifier = true;
253       HadMacro = true;
254     } else {
255       // If this is a final macro, make the deprecation and header unsafe bits
256       // stick around after the undefinition so they apply to any redefinitions.
257       if (!IsFinal) {
258         // Because calling the setters of these calls recomputes, just set them
259         // manually to avoid recomputing a bunch of times.
260         IsDeprecatedMacro = false;
261         IsRestrictExpansion = false;
262       }
263       RecomputeNeedsHandleIdentifier();
264     }
265   }
266   /// Returns true if this identifier was \#defined to some value at any
267   /// moment. In this case there should be an entry for the identifier in the
268   /// macro history table in Preprocessor.
269   bool hadMacroDefinition() const {
270     return HadMacro;
271   }
272 
273   bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
274 
275   void setIsDeprecatedMacro(bool Val) {
276     if (IsDeprecatedMacro == Val)
277       return;
278     IsDeprecatedMacro = Val;
279     if (Val)
280       NeedsHandleIdentifier = true;
281     else
282       RecomputeNeedsHandleIdentifier();
283   }
284 
285   bool isRestrictExpansion() const { return IsRestrictExpansion; }
286 
287   void setIsRestrictExpansion(bool Val) {
288     if (IsRestrictExpansion == Val)
289       return;
290     IsRestrictExpansion = Val;
291     if (Val)
292       NeedsHandleIdentifier = true;
293     else
294       RecomputeNeedsHandleIdentifier();
295   }
296 
297   bool isFinal() const { return IsFinal; }
298 
299   void setIsFinal(bool Val) { IsFinal = Val; }
300 
301   /// If this is a source-language token (e.g. 'for'), this API
302   /// can be used to cause the lexer to map identifiers to source-language
303   /// tokens.
304   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
305 
306   /// True if revertTokenIDToIdentifier() was called.
307   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
308 
309   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
310   /// compatibility.
311   ///
312   /// TokenID is normally read-only but there are 2 instances where we revert it
313   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
314   /// using this method so we can inform serialization about it.
315   void revertTokenIDToIdentifier() {
316     assert(TokenID != tok::identifier && "Already at tok::identifier");
317     TokenID = tok::identifier;
318     RevertedTokenID = true;
319   }
320   void revertIdentifierToTokenID(tok::TokenKind TK) {
321     assert(TokenID == tok::identifier && "Should be at tok::identifier");
322     TokenID = TK;
323     RevertedTokenID = false;
324   }
325 
326   /// Return the preprocessor keyword ID for this identifier.
327   ///
328   /// For example, "define" will return tok::pp_define.
329   tok::PPKeywordKind getPPKeywordID() const;
330 
331   /// Return the Objective-C keyword ID for the this identifier.
332   ///
333   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
334   tok::ObjCKeywordKind getObjCKeywordID() const {
335     static_assert(FirstObjCKeywordID == 1,
336                   "hard-coding this assumption to simplify code");
337     if (ObjCOrBuiltinID <= LastObjCKeywordID)
338       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
339     else
340       return tok::objc_not_keyword;
341   }
342   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
343 
344   /// Return a value indicating whether this is a builtin function.
345   ///
346   /// 0 is not-built-in. 1+ are specific builtin functions.
347   unsigned getBuiltinID() const {
348     if (ObjCOrBuiltinID >= FirstBuiltinID)
349       return 1 + (ObjCOrBuiltinID - FirstBuiltinID);
350     else
351       return 0;
352   }
353   void setBuiltinID(unsigned ID) {
354     assert(ID != 0);
355     ObjCOrBuiltinID = FirstBuiltinID + (ID - 1);
356     assert(getBuiltinID() == ID && "ID too large for field!");
357   }
358   void clearBuiltinID() { ObjCOrBuiltinID = 0; }
359 
360   tok::InterestingIdentifierKind getInterestingIdentifierID() const {
361     if (ObjCOrBuiltinID >= FirstInterestingIdentifierID &&
362         ObjCOrBuiltinID <= LastInterestingIdentifierID)
363       return tok::InterestingIdentifierKind(
364           1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID));
365     else
366       return tok::not_interesting;
367   }
368   void setInterestingIdentifierID(unsigned ID) {
369     assert(ID != tok::not_interesting);
370     ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1);
371     assert(getInterestingIdentifierID() == ID && "ID too large for field!");
372   }
373 
374   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
375   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
376 
377   /// get/setExtension - Initialize information about whether or not this
378   /// language token is an extension.  This controls extension warnings, and is
379   /// only valid if a custom token ID is set.
380   bool isExtensionToken() const { return IsExtension; }
381   void setIsExtensionToken(bool Val) {
382     IsExtension = Val;
383     if (Val)
384       NeedsHandleIdentifier = true;
385     else
386       RecomputeNeedsHandleIdentifier();
387   }
388 
389   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
390   /// this language token is a keyword in a newer or proposed Standard. This
391   /// controls compatibility warnings, and is only true when not parsing the
392   /// corresponding Standard. Once a compatibility problem has been diagnosed
393   /// with this keyword, the flag will be cleared.
394   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
395   void setIsFutureCompatKeyword(bool Val) {
396     IsFutureCompatKeyword = Val;
397     if (Val)
398       NeedsHandleIdentifier = true;
399     else
400       RecomputeNeedsHandleIdentifier();
401   }
402 
403   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
404   /// Preprocessor will emit an error every time this token is used.
405   void setIsPoisoned(bool Value = true) {
406     IsPoisoned = Value;
407     if (Value)
408       NeedsHandleIdentifier = true;
409     else
410       RecomputeNeedsHandleIdentifier();
411   }
412 
413   /// Return true if this token has been poisoned.
414   bool isPoisoned() const { return IsPoisoned; }
415 
416   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
417   /// this identifier is a C++ alternate representation of an operator.
418   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
419     IsCPPOperatorKeyword = Val;
420   }
421   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
422 
423   /// Return true if this token is a keyword in the specified language.
424   bool isKeyword(const LangOptions &LangOpts) const;
425 
426   /// Return true if this token is a C++ keyword in the specified
427   /// language.
428   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
429 
430   /// Get and set FETokenInfo. The language front-end is allowed to associate
431   /// arbitrary metadata with this token.
432   void *getFETokenInfo() const { return FETokenInfo; }
433   void setFETokenInfo(void *T) { FETokenInfo = T; }
434 
435   /// Return true if the Preprocessor::HandleIdentifier must be called
436   /// on a token of this identifier.
437   ///
438   /// If this returns false, we know that HandleIdentifier will not affect
439   /// the token.
440   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
441 
442   /// Return true if the identifier in its current state was loaded
443   /// from an AST file.
444   bool isFromAST() const { return IsFromAST; }
445 
446   void setIsFromAST() { IsFromAST = true; }
447 
448   /// Determine whether this identifier has changed since it was loaded
449   /// from an AST file.
450   bool hasChangedSinceDeserialization() const {
451     return ChangedAfterLoad;
452   }
453 
454   /// Note that this identifier has changed since it was loaded from
455   /// an AST file.
456   void setChangedSinceDeserialization() {
457     ChangedAfterLoad = true;
458   }
459 
460   /// Determine whether the frontend token information for this
461   /// identifier has changed since it was loaded from an AST file.
462   bool hasFETokenInfoChangedSinceDeserialization() const {
463     return FEChangedAfterLoad;
464   }
465 
466   /// Note that the frontend token information for this identifier has
467   /// changed since it was loaded from an AST file.
468   void setFETokenInfoChangedSinceDeserialization() {
469     FEChangedAfterLoad = true;
470   }
471 
472   /// Determine whether the information for this identifier is out of
473   /// date with respect to the external source.
474   bool isOutOfDate() const { return OutOfDate; }
475 
476   /// Set whether the information for this identifier is out of
477   /// date with respect to the external source.
478   void setOutOfDate(bool OOD) {
479     OutOfDate = OOD;
480     if (OOD)
481       NeedsHandleIdentifier = true;
482     else
483       RecomputeNeedsHandleIdentifier();
484   }
485 
486   /// Determine whether this is the contextual keyword \c import.
487   bool isModulesImport() const { return IsModulesImport; }
488 
489   /// Set whether this identifier is the contextual keyword \c import.
490   void setModulesImport(bool I) {
491     IsModulesImport = I;
492     if (I)
493       NeedsHandleIdentifier = true;
494     else
495       RecomputeNeedsHandleIdentifier();
496   }
497 
498   /// Determine whether this is the mangled name of an OpenMP variant.
499   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
500 
501   /// Set whether this is the mangled name of an OpenMP variant.
502   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
503 
504   /// Return true if this identifier is an editor placeholder.
505   ///
506   /// Editor placeholders are produced by the code-completion engine and are
507   /// represented as characters between '<#' and '#>' in the source code. An
508   /// example of auto-completed call with a placeholder parameter is shown
509   /// below:
510   /// \code
511   ///   function(<#int x#>);
512   /// \endcode
513   bool isEditorPlaceholder() const {
514     return getName().starts_with("<#") && getName().ends_with("#>");
515   }
516 
517   /// Determine whether \p this is a name reserved for the implementation (C99
518   /// 7.1.3, C++ [lib.global.names]).
519   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
520 
521   /// Determine whether \p this is a name reserved for future standardization or
522   /// the implementation (C++ [usrlit.suffix]).
523   ReservedLiteralSuffixIdStatus isReservedLiteralSuffixId() const;
524 
525   /// If the identifier is an "uglified" reserved name, return a cleaned form.
526   /// e.g. _Foo => Foo. Otherwise, just returns the name.
527   StringRef deuglifiedName() const;
528   bool isPlaceholder() const {
529     return getLength() == 1 && getNameStart()[0] == '_';
530   }
531 
532   /// Provide less than operator for lexicographical sorting.
533   bool operator<(const IdentifierInfo &RHS) const {
534     return getName() < RHS.getName();
535   }
536 
537 private:
538   /// The Preprocessor::HandleIdentifier does several special (but rare)
539   /// things to identifiers of various sorts.  For example, it changes the
540   /// \c for keyword token from tok::identifier to tok::for.
541   ///
542   /// This method is very tied to the definition of HandleIdentifier.  Any
543   /// change to it should be reflected here.
544   void RecomputeNeedsHandleIdentifier() {
545     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
546                             isExtensionToken() || isFutureCompatKeyword() ||
547                             isOutOfDate() || isModulesImport();
548   }
549 };
550 
551 /// An RAII object for [un]poisoning an identifier within a scope.
552 ///
553 /// \p II is allowed to be null, in which case objects of this type have
554 /// no effect.
555 class PoisonIdentifierRAIIObject {
556   IdentifierInfo *const II;
557   const bool OldValue;
558 
559 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)560   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
561     : II(II), OldValue(II ? II->isPoisoned() : false) {
562     if(II)
563       II->setIsPoisoned(NewValue);
564   }
565 
~PoisonIdentifierRAIIObject()566   ~PoisonIdentifierRAIIObject() {
567     if(II)
568       II->setIsPoisoned(OldValue);
569   }
570 };
571 
572 /// An iterator that walks over all of the known identifiers
573 /// in the lookup table.
574 ///
575 /// Since this iterator uses an abstract interface via virtual
576 /// functions, it uses an object-oriented interface rather than the
577 /// more standard C++ STL iterator interface. In this OO-style
578 /// iteration, the single function \c Next() provides dereference,
579 /// advance, and end-of-sequence checking in a single
580 /// operation. Subclasses of this iterator type will provide the
581 /// actual functionality.
582 class IdentifierIterator {
583 protected:
584   IdentifierIterator() = default;
585 
586 public:
587   IdentifierIterator(const IdentifierIterator &) = delete;
588   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
589 
590   virtual ~IdentifierIterator();
591 
592   /// Retrieve the next string in the identifier table and
593   /// advances the iterator for the following string.
594   ///
595   /// \returns The next string in the identifier table. If there is
596   /// no such string, returns an empty \c StringRef.
597   virtual StringRef Next() = 0;
598 };
599 
600 /// Provides lookups to, and iteration over, IdentiferInfo objects.
601 class IdentifierInfoLookup {
602 public:
603   virtual ~IdentifierInfoLookup();
604 
605   /// Return the IdentifierInfo for the specified named identifier.
606   ///
607   /// Unlike the version in IdentifierTable, this returns a pointer instead
608   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
609   /// be found.
610   virtual IdentifierInfo* get(StringRef Name) = 0;
611 
612   /// Retrieve an iterator into the set of all identifiers
613   /// known to this identifier lookup source.
614   ///
615   /// This routine provides access to all of the identifiers known to
616   /// the identifier lookup, allowing access to the contents of the
617   /// identifiers without introducing the overhead of constructing
618   /// IdentifierInfo objects for each.
619   ///
620   /// \returns A new iterator into the set of known identifiers. The
621   /// caller is responsible for deleting this iterator.
622   virtual IdentifierIterator *getIdentifiers();
623 };
624 
625 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
626 ///
627 /// This has no other purpose, but this is an extremely performance-critical
628 /// piece of the code, as each occurrence of every identifier goes through
629 /// here when lexed.
630 class IdentifierTable {
631   // Shark shows that using MallocAllocator is *much* slower than using this
632   // BumpPtrAllocator!
633   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
634   HashTableTy HashTable;
635 
636   IdentifierInfoLookup* ExternalLookup;
637 
638 public:
639   /// Create the identifier table.
640   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
641 
642   /// Create the identifier table, populating it with info about the
643   /// language keywords for the language specified by \p LangOpts.
644   explicit IdentifierTable(const LangOptions &LangOpts,
645                            IdentifierInfoLookup *ExternalLookup = nullptr);
646 
647   /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)648   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
649     ExternalLookup = IILookup;
650   }
651 
652   /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()653   IdentifierInfoLookup *getExternalIdentifierLookup() const {
654     return ExternalLookup;
655   }
656 
getAllocator()657   llvm::BumpPtrAllocator& getAllocator() {
658     return HashTable.getAllocator();
659   }
660 
661   /// Return the identifier token info for the specified named
662   /// identifier.
get(StringRef Name)663   IdentifierInfo &get(StringRef Name) {
664     auto &Entry = *HashTable.try_emplace(Name, nullptr).first;
665 
666     IdentifierInfo *&II = Entry.second;
667     if (II) return *II;
668 
669     // No entry; if we have an external lookup, look there first.
670     if (ExternalLookup) {
671       II = ExternalLookup->get(Name);
672       if (II)
673         return *II;
674     }
675 
676     // Lookups failed, make a new IdentifierInfo.
677     void *Mem = getAllocator().Allocate<IdentifierInfo>();
678     II = new (Mem) IdentifierInfo();
679 
680     // Make sure getName() knows how to find the IdentifierInfo
681     // contents.
682     II->Entry = &Entry;
683 
684     return *II;
685   }
686 
get(StringRef Name,tok::TokenKind TokenCode)687   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
688     IdentifierInfo &II = get(Name);
689     II.TokenID = TokenCode;
690     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
691     return II;
692   }
693 
694   /// Gets an IdentifierInfo for the given name without consulting
695   ///        external sources.
696   ///
697   /// This is a version of get() meant for external sources that want to
698   /// introduce or modify an identifier. If they called get(), they would
699   /// likely end up in a recursion.
getOwn(StringRef Name)700   IdentifierInfo &getOwn(StringRef Name) {
701     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
702 
703     IdentifierInfo *&II = Entry.second;
704     if (II)
705       return *II;
706 
707     // Lookups failed, make a new IdentifierInfo.
708     void *Mem = getAllocator().Allocate<IdentifierInfo>();
709     II = new (Mem) IdentifierInfo();
710 
711     // Make sure getName() knows how to find the IdentifierInfo
712     // contents.
713     II->Entry = &Entry;
714 
715     // If this is the 'import' contextual keyword, mark it as such.
716     if (Name.equals("import"))
717       II->setModulesImport(true);
718 
719     return *II;
720   }
721 
722   using iterator = HashTableTy::const_iterator;
723   using const_iterator = HashTableTy::const_iterator;
724 
begin()725   iterator begin() const { return HashTable.begin(); }
end()726   iterator end() const   { return HashTable.end(); }
size()727   unsigned size() const  { return HashTable.size(); }
728 
find(StringRef Name)729   iterator find(StringRef Name) const { return HashTable.find(Name); }
730 
731   /// Print some statistics to stderr that indicate how well the
732   /// hashing is doing.
733   void PrintStats() const;
734 
735   /// Populate the identifier table with info about the language keywords
736   /// for the language specified by \p LangOpts.
737   void AddKeywords(const LangOptions &LangOpts);
738 
739   /// Returns the correct diagnostic to issue for a future-compat diagnostic
740   /// warning. Note, this function assumes the identifier passed has already
741   /// been determined to be a future compatible keyword.
742   diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
743                                      const LangOptions &LangOpts);
744 };
745 
746 /// A family of Objective-C methods.
747 ///
748 /// These families have no inherent meaning in the language, but are
749 /// nonetheless central enough in the existing implementations to
750 /// merit direct AST support.  While, in theory, arbitrary methods can
751 /// be considered to form families, we focus here on the methods
752 /// involving allocation and retain-count management, as these are the
753 /// most "core" and the most likely to be useful to diverse clients
754 /// without extra information.
755 ///
756 /// Both selectors and actual method declarations may be classified
757 /// into families.  Method families may impose additional restrictions
758 /// beyond their selector name; for example, a method called '_init'
759 /// that returns void is not considered to be in the 'init' family
760 /// (but would be if it returned 'id').  It is also possible to
761 /// explicitly change or remove a method's family.  Therefore the
762 /// method's family should be considered the single source of truth.
763 enum ObjCMethodFamily {
764   /// No particular method family.
765   OMF_None,
766 
767   // Selectors in these families may have arbitrary arity, may be
768   // written with arbitrary leading underscores, and may have
769   // additional CamelCase "words" in their first selector chunk
770   // following the family name.
771   OMF_alloc,
772   OMF_copy,
773   OMF_init,
774   OMF_mutableCopy,
775   OMF_new,
776 
777   // These families are singletons consisting only of the nullary
778   // selector with the given name.
779   OMF_autorelease,
780   OMF_dealloc,
781   OMF_finalize,
782   OMF_release,
783   OMF_retain,
784   OMF_retainCount,
785   OMF_self,
786   OMF_initialize,
787 
788   // performSelector families
789   OMF_performSelector
790 };
791 
792 /// Enough bits to store any enumerator in ObjCMethodFamily or
793 /// InvalidObjCMethodFamily.
794 enum { ObjCMethodFamilyBitWidth = 4 };
795 
796 /// An invalid value of ObjCMethodFamily.
797 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
798 
799 /// A family of Objective-C methods.
800 ///
801 /// These are family of methods whose result type is initially 'id', but
802 /// but are candidate for the result type to be changed to 'instancetype'.
803 enum ObjCInstanceTypeFamily {
804   OIT_None,
805   OIT_Array,
806   OIT_Dictionary,
807   OIT_Singleton,
808   OIT_Init,
809   OIT_ReturnsSelf
810 };
811 
812 enum ObjCStringFormatFamily {
813   SFF_None,
814   SFF_NSString,
815   SFF_CFString
816 };
817 
818 namespace detail {
819 
820 /// DeclarationNameExtra is used as a base of various uncommon special names.
821 /// This class is needed since DeclarationName has not enough space to store
822 /// the kind of every possible names. Therefore the kind of common names is
823 /// stored directly in DeclarationName, and the kind of uncommon names is
824 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
825 /// DeclarationName needs the lower 3 bits to store the kind of common names.
826 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
827 /// here is very likely to require changes in DeclarationName(Table).
alignas(IdentifierInfoAlignment)828 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
829   friend class clang::DeclarationName;
830   friend class clang::DeclarationNameTable;
831 
832 protected:
833   /// The kind of "extra" information stored in the DeclarationName. See
834   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
835   /// are used. Note that DeclarationName depends on the numerical values
836   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
837   /// for more info.
838   enum ExtraKind {
839     CXXDeductionGuideName,
840     CXXLiteralOperatorName,
841     CXXUsingDirective,
842     ObjCMultiArgSelector
843   };
844 
845   /// ExtraKindOrNumArgs has one of the following meaning:
846   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
847   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
848   ///    a CXXLiteralOperatorIdName.
849   ///
850   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
851   ///
852   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
853   ///    the number of arguments in the Objective-C selector, in which
854   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
855   unsigned ExtraKindOrNumArgs;
856 
857   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
858   DeclarationNameExtra(unsigned NumArgs)
859       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
860 
861   /// Return the corresponding ExtraKind.
862   ExtraKind getKind() const {
863     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
864                                           (unsigned)ObjCMultiArgSelector
865                                       ? (unsigned)ObjCMultiArgSelector
866                                       : ExtraKindOrNumArgs);
867   }
868 
869   /// Return the number of arguments in an ObjC selector. Only valid when this
870   /// is indeed an ObjCMultiArgSelector.
871   unsigned getNumArgs() const {
872     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
873            "getNumArgs called but this is not an ObjC selector!");
874     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
875   }
876 };
877 
878 } // namespace detail
879 
880 /// One of these variable length records is kept for each
881 /// selector containing more than one keyword. We use a folding set
882 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to
883 /// this class is provided strictly through Selector.
alignas(IdentifierInfoAlignment)884 class alignas(IdentifierInfoAlignment) MultiKeywordSelector
885     : public detail::DeclarationNameExtra,
886       public llvm::FoldingSetNode {
887   MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
888 
889 public:
890   // Constructor for keyword selectors.
891   MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
892       : DeclarationNameExtra(nKeys) {
893     assert((nKeys > 1) && "not a multi-keyword selector");
894 
895     // Fill in the trailing keyword array.
896     IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
897     for (unsigned i = 0; i != nKeys; ++i)
898       KeyInfo[i] = IIV[i];
899   }
900 
901   // getName - Derive the full selector name and return it.
902   std::string getName() const;
903 
904   using DeclarationNameExtra::getNumArgs;
905 
906   using keyword_iterator = IdentifierInfo *const *;
907 
908   keyword_iterator keyword_begin() const {
909     return reinterpret_cast<keyword_iterator>(this + 1);
910   }
911 
912   keyword_iterator keyword_end() const {
913     return keyword_begin() + getNumArgs();
914   }
915 
916   IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
917     assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
918     return keyword_begin()[i];
919   }
920 
921   static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
922                       unsigned NumArgs) {
923     ID.AddInteger(NumArgs);
924     for (unsigned i = 0; i != NumArgs; ++i)
925       ID.AddPointer(ArgTys[i]);
926   }
927 
928   void Profile(llvm::FoldingSetNodeID &ID) {
929     Profile(ID, keyword_begin(), getNumArgs());
930   }
931 };
932 
933 /// Smart pointer class that efficiently represents Objective-C method
934 /// names.
935 ///
936 /// This class will either point to an IdentifierInfo or a
937 /// MultiKeywordSelector (which is private). This enables us to optimize
938 /// selectors that take no arguments and selectors that take 1 argument, which
939 /// accounts for 78% of all selectors in Cocoa.h.
940 class Selector {
941   friend class Diagnostic;
942   friend class SelectorTable; // only the SelectorTable can create these
943   friend class DeclarationName; // and the AST's DeclarationName.
944 
945   enum IdentifierInfoFlag {
946     // Empty selector = 0. Note that these enumeration values must
947     // correspond to the enumeration values of DeclarationName::StoredNameKind
948     ZeroArg = 0x01,
949     OneArg = 0x02,
950     // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator
951     // value.
952     MultiArg = 0x07,
953   };
954 
955   /// IMPORTANT NOTE: the order of the types in this PointerUnion are
956   /// important! The DeclarationName class has bidirectional conversion
957   /// to/from Selector through an opaque pointer (void *) which corresponds
958   /// to this PointerIntPair. The discriminator bit from the PointerUnion
959   /// corresponds to the high bit in the MultiArg enumerator. So while this
960   /// PointerIntPair only has two bits for the integer (and we mask off the
961   /// high bit in `MultiArg` when it is used), that discrimator bit is
962   /// still necessary for the opaque conversion. The discriminator bit
963   /// from the PointerUnion and the two integer bits from the
964   /// PointerIntPair are also exposed via the DeclarationName::StoredNameKind
965   /// enumeration; see the comments in DeclarationName.h for more details.
966   /// Do not reorder or add any arguments to this template
967   /// without thoroughly understanding how tightly coupled these classes are.
968   llvm::PointerIntPair<
969       llvm::PointerUnion<IdentifierInfo *, MultiKeywordSelector *>, 2>
970       InfoPtr;
971 
Selector(IdentifierInfo * II,unsigned nArgs)972   Selector(IdentifierInfo *II, unsigned nArgs) {
973     assert(nArgs < 2 && "nArgs not equal to 0/1");
974     InfoPtr.setPointerAndInt(II, nArgs + 1);
975   }
976 
Selector(MultiKeywordSelector * SI)977   Selector(MultiKeywordSelector *SI) {
978     // IMPORTANT NOTE: we mask off the upper bit of this value because we only
979     // reserve two bits for the integer in the PointerIntPair. See the comments
980     // in `InfoPtr` for more details.
981     InfoPtr.setPointerAndInt(SI, MultiArg & 0b11);
982   }
983 
getAsIdentifierInfo()984   IdentifierInfo *getAsIdentifierInfo() const {
985     return InfoPtr.getPointer().dyn_cast<IdentifierInfo *>();
986   }
987 
getMultiKeywordSelector()988   MultiKeywordSelector *getMultiKeywordSelector() const {
989     return InfoPtr.getPointer().get<MultiKeywordSelector *>();
990   }
991 
getIdentifierInfoFlag()992   unsigned getIdentifierInfoFlag() const {
993     unsigned new_flags = InfoPtr.getInt();
994     // IMPORTANT NOTE: We have to reconstitute this data rather than use the
995     // value directly from the PointerIntPair. See the comments in `InfoPtr`
996     // for more details.
997     if (InfoPtr.getPointer().is<MultiKeywordSelector *>())
998       new_flags |= MultiArg;
999     return new_flags;
1000   }
1001 
1002   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
1003 
1004   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
1005 
1006 public:
1007   /// The default ctor should only be used when creating data structures that
1008   ///  will contain selectors.
1009   Selector() = default;
Selector(uintptr_t V)1010   explicit Selector(uintptr_t V) {
1011     InfoPtr.setFromOpaqueValue(reinterpret_cast<void *>(V));
1012   }
1013 
1014   /// operator==/!= - Indicate whether the specified selectors are identical.
1015   bool operator==(Selector RHS) const {
1016     return InfoPtr.getOpaqueValue() == RHS.InfoPtr.getOpaqueValue();
1017   }
1018   bool operator!=(Selector RHS) const {
1019     return InfoPtr.getOpaqueValue() != RHS.InfoPtr.getOpaqueValue();
1020   }
1021 
getAsOpaquePtr()1022   void *getAsOpaquePtr() const { return InfoPtr.getOpaqueValue(); }
1023 
1024   /// Determine whether this is the empty selector.
isNull()1025   bool isNull() const { return InfoPtr.getOpaqueValue() == nullptr; }
1026 
1027   // Predicates to identify the selector type.
isKeywordSelector()1028   bool isKeywordSelector() const { return InfoPtr.getInt() != ZeroArg; }
1029 
isUnarySelector()1030   bool isUnarySelector() const { return InfoPtr.getInt() == ZeroArg; }
1031 
1032   /// If this selector is the specific keyword selector described by Names.
1033   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
1034 
1035   /// If this selector is the specific unary selector described by Name.
1036   bool isUnarySelector(StringRef Name) const;
1037 
1038   unsigned getNumArgs() const;
1039 
1040   /// Retrieve the identifier at a given position in the selector.
1041   ///
1042   /// Note that the identifier pointer returned may be NULL. Clients that only
1043   /// care about the text of the identifier string, and not the specific,
1044   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
1045   /// an empty string when the identifier pointer would be NULL.
1046   ///
1047   /// \param argIndex The index for which we want to retrieve the identifier.
1048   /// This index shall be less than \c getNumArgs() unless this is a keyword
1049   /// selector, in which case 0 is the only permissible value.
1050   ///
1051   /// \returns the uniqued identifier for this slot, or NULL if this slot has
1052   /// no corresponding identifier.
1053   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
1054 
1055   /// Retrieve the name at a given position in the selector.
1056   ///
1057   /// \param argIndex The index for which we want to retrieve the name.
1058   /// This index shall be less than \c getNumArgs() unless this is a keyword
1059   /// selector, in which case 0 is the only permissible value.
1060   ///
1061   /// \returns the name for this slot, which may be the empty string if no
1062   /// name was supplied.
1063   StringRef getNameForSlot(unsigned argIndex) const;
1064 
1065   /// Derive the full selector name (e.g. "foo:bar:") and return
1066   /// it as an std::string.
1067   std::string getAsString() const;
1068 
1069   /// Prints the full selector name (e.g. "foo:bar:").
1070   void print(llvm::raw_ostream &OS) const;
1071 
1072   void dump() const;
1073 
1074   /// Derive the conventional family of this method.
getMethodFamily()1075   ObjCMethodFamily getMethodFamily() const {
1076     return getMethodFamilyImpl(*this);
1077   }
1078 
getStringFormatFamily()1079   ObjCStringFormatFamily getStringFormatFamily() const {
1080     return getStringFormatFamilyImpl(*this);
1081   }
1082 
getEmptyMarker()1083   static Selector getEmptyMarker() {
1084     return Selector(uintptr_t(-1));
1085   }
1086 
getTombstoneMarker()1087   static Selector getTombstoneMarker() {
1088     return Selector(uintptr_t(-2));
1089   }
1090 
1091   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
1092 };
1093 
1094 /// This table allows us to fully hide how we implement
1095 /// multi-keyword caching.
1096 class SelectorTable {
1097   // Actually a SelectorTableImpl
1098   void *Impl;
1099 
1100 public:
1101   SelectorTable();
1102   SelectorTable(const SelectorTable &) = delete;
1103   SelectorTable &operator=(const SelectorTable &) = delete;
1104   ~SelectorTable();
1105 
1106   /// Can create any sort of selector.
1107   ///
1108   /// \p NumArgs indicates whether this is a no argument selector "foo", a
1109   /// single argument selector "foo:" or multi-argument "foo:bar:".
1110   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
1111 
getUnarySelector(IdentifierInfo * ID)1112   Selector getUnarySelector(IdentifierInfo *ID) {
1113     return Selector(ID, 1);
1114   }
1115 
getNullarySelector(IdentifierInfo * ID)1116   Selector getNullarySelector(IdentifierInfo *ID) {
1117     return Selector(ID, 0);
1118   }
1119 
1120   /// Return the total amount of memory allocated for managing selectors.
1121   size_t getTotalMemory() const;
1122 
1123   /// Return the default setter name for the given identifier.
1124   ///
1125   /// This is "set" + \p Name where the initial character of \p Name
1126   /// has been capitalized.
1127   static SmallString<64> constructSetterName(StringRef Name);
1128 
1129   /// Return the default setter selector for the given identifier.
1130   ///
1131   /// This is "set" + \p Name where the initial character of \p Name
1132   /// has been capitalized.
1133   static Selector constructSetterSelector(IdentifierTable &Idents,
1134                                           SelectorTable &SelTable,
1135                                           const IdentifierInfo *Name);
1136 
1137   /// Return the property name for the given setter selector.
1138   static std::string getPropertyNameFromSetterSelector(Selector Sel);
1139 };
1140 
1141 }  // namespace clang
1142 
1143 namespace llvm {
1144 
1145 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1146 /// DenseSets.
1147 template <>
1148 struct DenseMapInfo<clang::Selector> {
1149   static clang::Selector getEmptyKey() {
1150     return clang::Selector::getEmptyMarker();
1151   }
1152 
1153   static clang::Selector getTombstoneKey() {
1154     return clang::Selector::getTombstoneMarker();
1155   }
1156 
1157   static unsigned getHashValue(clang::Selector S);
1158 
1159   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1160     return LHS == RHS;
1161   }
1162 };
1163 
1164 template<>
1165 struct PointerLikeTypeTraits<clang::Selector> {
1166   static const void *getAsVoidPointer(clang::Selector P) {
1167     return P.getAsOpaquePtr();
1168   }
1169 
1170   static clang::Selector getFromVoidPointer(const void *P) {
1171     return clang::Selector(reinterpret_cast<uintptr_t>(P));
1172   }
1173 
1174   static constexpr int NumLowBitsAvailable = 0;
1175 };
1176 
1177 } // namespace llvm
1178 
1179 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1180