1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/DenseMapInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/PointerLikeTypeTraits.h"
27 #include "llvm/Support/type_traits.h"
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 #include <string>
33 #include <utility>
34 
35 namespace clang {
36 
37 class DeclarationName;
38 class DeclarationNameTable;
39 class IdentifierInfo;
40 class LangOptions;
41 class MultiKeywordSelector;
42 class SourceLocation;
43 
44 enum class ReservedIdentifierStatus {
45   NotReserved = 0,
46   StartsWithUnderscoreAtGlobalScope,
47   StartsWithUnderscoreAndIsExternC,
48   StartsWithDoubleUnderscore,
49   StartsWithUnderscoreFollowedByCapitalLetter,
50   ContainsDoubleUnderscore,
51 };
52 
53 /// Determine whether an identifier is reserved for use as a name at global
54 /// scope. Such identifiers might be implementation-specific global functions
55 /// or variables.
isReservedAtGlobalScope(ReservedIdentifierStatus Status)56 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
57   return Status != ReservedIdentifierStatus::NotReserved;
58 }
59 
60 /// Determine whether an identifier is reserved in all contexts. Such
61 /// identifiers might be implementation-specific keywords or macros, for
62 /// example.
isReservedInAllContexts(ReservedIdentifierStatus Status)63 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
64   return Status != ReservedIdentifierStatus::NotReserved &&
65          Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
66          Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
67 }
68 
69 /// A simple pair of identifier info and location.
70 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
71 
72 /// IdentifierInfo and other related classes are aligned to
73 /// 8 bytes so that DeclarationName can use the lower 3 bits
74 /// of a pointer to one of these classes.
75 enum { IdentifierInfoAlignment = 8 };
76 
77 static constexpr int ObjCOrBuiltinIDBits = 16;
78 
79 /// One of these records is kept for each identifier that
80 /// is lexed.  This contains information about whether the token was \#define'd,
81 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
82 /// variable or function name).  The preprocessor keeps this information in a
83 /// set, and all tok::identifier tokens have a pointer to one of these.
84 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
alignas(IdentifierInfoAlignment)85 class alignas(IdentifierInfoAlignment) IdentifierInfo {
86   friend class IdentifierTable;
87 
88   // Front-end token ID or tok::identifier.
89   unsigned TokenID : 9;
90 
91   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
92   // First NUM_OBJC_KEYWORDS values are for Objective-C,
93   // the remaining values are for builtins.
94   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
95 
96   // True if there is a #define for this.
97   unsigned HasMacro : 1;
98 
99   // True if there was a #define for this.
100   unsigned HadMacro : 1;
101 
102   // True if the identifier is a language extension.
103   unsigned IsExtension : 1;
104 
105   // True if the identifier is a keyword in a newer or proposed Standard.
106   unsigned IsFutureCompatKeyword : 1;
107 
108   // True if the identifier is poisoned.
109   unsigned IsPoisoned : 1;
110 
111   // True if the identifier is a C++ operator keyword.
112   unsigned IsCPPOperatorKeyword : 1;
113 
114   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
115   // See comment about RecomputeNeedsHandleIdentifier for more info.
116   unsigned NeedsHandleIdentifier : 1;
117 
118   // True if the identifier was loaded (at least partially) from an AST file.
119   unsigned IsFromAST : 1;
120 
121   // True if the identifier has changed from the definition
122   // loaded from an AST file.
123   unsigned ChangedAfterLoad : 1;
124 
125   // True if the identifier's frontend information has changed from the
126   // definition loaded from an AST file.
127   unsigned FEChangedAfterLoad : 1;
128 
129   // True if revertTokenIDToIdentifier was called.
130   unsigned RevertedTokenID : 1;
131 
132   // True if there may be additional information about
133   // this identifier stored externally.
134   unsigned OutOfDate : 1;
135 
136   // True if this is the 'import' contextual keyword.
137   unsigned IsModulesImport : 1;
138 
139   // True if this is a mangled OpenMP variant name.
140   unsigned IsMangledOpenMPVariantName : 1;
141 
142   // True if this is a deprecated macro.
143   unsigned IsDeprecatedMacro : 1;
144 
145   // True if this macro is unsafe in headers.
146   unsigned IsRestrictExpansion : 1;
147 
148   // True if this macro is final.
149   unsigned IsFinal : 1;
150 
151   // 22 bits left in a 64-bit word.
152 
153   // Managed by the language front-end.
154   void *FETokenInfo = nullptr;
155 
156   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
157 
158   IdentifierInfo()
159       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
160         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
161         IsPoisoned(false), IsCPPOperatorKeyword(false),
162         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
163         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
164         IsModulesImport(false), IsMangledOpenMPVariantName(false),
165         IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
166 
167 public:
168   IdentifierInfo(const IdentifierInfo &) = delete;
169   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
170   IdentifierInfo(IdentifierInfo &&) = delete;
171   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
172 
173   /// Return true if this is the identifier for the specified string.
174   ///
175   /// This is intended to be used for string literals only: II->isStr("foo").
176   template <std::size_t StrLen>
177   bool isStr(const char (&Str)[StrLen]) const {
178     return getLength() == StrLen-1 &&
179            memcmp(getNameStart(), Str, StrLen-1) == 0;
180   }
181 
182   /// Return true if this is the identifier for the specified StringRef.
183   bool isStr(llvm::StringRef Str) const {
184     llvm::StringRef ThisStr(getNameStart(), getLength());
185     return ThisStr == Str;
186   }
187 
188   /// Return the beginning of the actual null-terminated string for this
189   /// identifier.
190   const char *getNameStart() const { return Entry->getKeyData(); }
191 
192   /// Efficiently return the length of this identifier info.
193   unsigned getLength() const { return Entry->getKeyLength(); }
194 
195   /// Return the actual identifier string.
196   StringRef getName() const {
197     return StringRef(getNameStart(), getLength());
198   }
199 
200   /// Return true if this identifier is \#defined to some other value.
201   /// \note The current definition may be in a module and not currently visible.
202   bool hasMacroDefinition() const {
203     return HasMacro;
204   }
205   void setHasMacroDefinition(bool Val) {
206     if (HasMacro == Val) return;
207 
208     HasMacro = Val;
209     if (Val) {
210       NeedsHandleIdentifier = true;
211       HadMacro = true;
212     } else {
213       // If this is a final macro, make the deprecation and header unsafe bits
214       // stick around after the undefinition so they apply to any redefinitions.
215       if (!IsFinal) {
216         // Because calling the setters of these calls recomputes, just set them
217         // manually to avoid recomputing a bunch of times.
218         IsDeprecatedMacro = false;
219         IsRestrictExpansion = false;
220       }
221       RecomputeNeedsHandleIdentifier();
222     }
223   }
224   /// Returns true if this identifier was \#defined to some value at any
225   /// moment. In this case there should be an entry for the identifier in the
226   /// macro history table in Preprocessor.
227   bool hadMacroDefinition() const {
228     return HadMacro;
229   }
230 
231   bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
232 
233   void setIsDeprecatedMacro(bool Val) {
234     if (IsDeprecatedMacro == Val)
235       return;
236     IsDeprecatedMacro = Val;
237     if (Val)
238       NeedsHandleIdentifier = true;
239     else
240       RecomputeNeedsHandleIdentifier();
241   }
242 
243   bool isRestrictExpansion() const { return IsRestrictExpansion; }
244 
245   void setIsRestrictExpansion(bool Val) {
246     if (IsRestrictExpansion == Val)
247       return;
248     IsRestrictExpansion = Val;
249     if (Val)
250       NeedsHandleIdentifier = true;
251     else
252       RecomputeNeedsHandleIdentifier();
253   }
254 
255   bool isFinal() const { return IsFinal; }
256 
257   void setIsFinal(bool Val) { IsFinal = Val; }
258 
259   /// If this is a source-language token (e.g. 'for'), this API
260   /// can be used to cause the lexer to map identifiers to source-language
261   /// tokens.
262   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
263 
264   /// True if revertTokenIDToIdentifier() was called.
265   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
266 
267   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
268   /// compatibility.
269   ///
270   /// TokenID is normally read-only but there are 2 instances where we revert it
271   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
272   /// using this method so we can inform serialization about it.
273   void revertTokenIDToIdentifier() {
274     assert(TokenID != tok::identifier && "Already at tok::identifier");
275     TokenID = tok::identifier;
276     RevertedTokenID = true;
277   }
278   void revertIdentifierToTokenID(tok::TokenKind TK) {
279     assert(TokenID == tok::identifier && "Should be at tok::identifier");
280     TokenID = TK;
281     RevertedTokenID = false;
282   }
283 
284   /// Return the preprocessor keyword ID for this identifier.
285   ///
286   /// For example, "define" will return tok::pp_define.
287   tok::PPKeywordKind getPPKeywordID() const;
288 
289   /// Return the Objective-C keyword ID for the this identifier.
290   ///
291   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
292   tok::ObjCKeywordKind getObjCKeywordID() const {
293     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
294       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
295     else
296       return tok::objc_not_keyword;
297   }
298   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
299 
300   /// Return a value indicating whether this is a builtin function.
301   ///
302   /// 0 is not-built-in. 1+ are specific builtin functions.
303   unsigned getBuiltinID() const {
304     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
305       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
306     else
307       return 0;
308   }
309   void setBuiltinID(unsigned ID) {
310     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
311     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
312            && "ID too large for field!");
313   }
314 
315   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
316   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
317 
318   /// get/setExtension - Initialize information about whether or not this
319   /// language token is an extension.  This controls extension warnings, and is
320   /// only valid if a custom token ID is set.
321   bool isExtensionToken() const { return IsExtension; }
322   void setIsExtensionToken(bool Val) {
323     IsExtension = Val;
324     if (Val)
325       NeedsHandleIdentifier = true;
326     else
327       RecomputeNeedsHandleIdentifier();
328   }
329 
330   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
331   /// this language token is a keyword in a newer or proposed Standard. This
332   /// controls compatibility warnings, and is only true when not parsing the
333   /// corresponding Standard. Once a compatibility problem has been diagnosed
334   /// with this keyword, the flag will be cleared.
335   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
336   void setIsFutureCompatKeyword(bool Val) {
337     IsFutureCompatKeyword = Val;
338     if (Val)
339       NeedsHandleIdentifier = true;
340     else
341       RecomputeNeedsHandleIdentifier();
342   }
343 
344   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
345   /// Preprocessor will emit an error every time this token is used.
346   void setIsPoisoned(bool Value = true) {
347     IsPoisoned = Value;
348     if (Value)
349       NeedsHandleIdentifier = true;
350     else
351       RecomputeNeedsHandleIdentifier();
352   }
353 
354   /// Return true if this token has been poisoned.
355   bool isPoisoned() const { return IsPoisoned; }
356 
357   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
358   /// this identifier is a C++ alternate representation of an operator.
359   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
360     IsCPPOperatorKeyword = Val;
361   }
362   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
363 
364   /// Return true if this token is a keyword in the specified language.
365   bool isKeyword(const LangOptions &LangOpts) const;
366 
367   /// Return true if this token is a C++ keyword in the specified
368   /// language.
369   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
370 
371   /// Get and set FETokenInfo. The language front-end is allowed to associate
372   /// arbitrary metadata with this token.
373   void *getFETokenInfo() const { return FETokenInfo; }
374   void setFETokenInfo(void *T) { FETokenInfo = T; }
375 
376   /// Return true if the Preprocessor::HandleIdentifier must be called
377   /// on a token of this identifier.
378   ///
379   /// If this returns false, we know that HandleIdentifier will not affect
380   /// the token.
381   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
382 
383   /// Return true if the identifier in its current state was loaded
384   /// from an AST file.
385   bool isFromAST() const { return IsFromAST; }
386 
387   void setIsFromAST() { IsFromAST = true; }
388 
389   /// Determine whether this identifier has changed since it was loaded
390   /// from an AST file.
391   bool hasChangedSinceDeserialization() const {
392     return ChangedAfterLoad;
393   }
394 
395   /// Note that this identifier has changed since it was loaded from
396   /// an AST file.
397   void setChangedSinceDeserialization() {
398     ChangedAfterLoad = true;
399   }
400 
401   /// Determine whether the frontend token information for this
402   /// identifier has changed since it was loaded from an AST file.
403   bool hasFETokenInfoChangedSinceDeserialization() const {
404     return FEChangedAfterLoad;
405   }
406 
407   /// Note that the frontend token information for this identifier has
408   /// changed since it was loaded from an AST file.
409   void setFETokenInfoChangedSinceDeserialization() {
410     FEChangedAfterLoad = true;
411   }
412 
413   /// Determine whether the information for this identifier is out of
414   /// date with respect to the external source.
415   bool isOutOfDate() const { return OutOfDate; }
416 
417   /// Set whether the information for this identifier is out of
418   /// date with respect to the external source.
419   void setOutOfDate(bool OOD) {
420     OutOfDate = OOD;
421     if (OOD)
422       NeedsHandleIdentifier = true;
423     else
424       RecomputeNeedsHandleIdentifier();
425   }
426 
427   /// Determine whether this is the contextual keyword \c import.
428   bool isModulesImport() const { return IsModulesImport; }
429 
430   /// Set whether this identifier is the contextual keyword \c import.
431   void setModulesImport(bool I) {
432     IsModulesImport = I;
433     if (I)
434       NeedsHandleIdentifier = true;
435     else
436       RecomputeNeedsHandleIdentifier();
437   }
438 
439   /// Determine whether this is the mangled name of an OpenMP variant.
440   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
441 
442   /// Set whether this is the mangled name of an OpenMP variant.
443   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
444 
445   /// Return true if this identifier is an editor placeholder.
446   ///
447   /// Editor placeholders are produced by the code-completion engine and are
448   /// represented as characters between '<#' and '#>' in the source code. An
449   /// example of auto-completed call with a placeholder parameter is shown
450   /// below:
451   /// \code
452   ///   function(<#int x#>);
453   /// \endcode
454   bool isEditorPlaceholder() const {
455     return getName().startswith("<#") && getName().endswith("#>");
456   }
457 
458   /// Determine whether \p this is a name reserved for the implementation (C99
459   /// 7.1.3, C++ [lib.global.names]).
460   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
461 
462   /// If the identifier is an "uglified" reserved name, return a cleaned form.
463   /// e.g. _Foo => Foo. Otherwise, just returns the name.
464   StringRef deuglifiedName() const;
465 
466   /// Provide less than operator for lexicographical sorting.
467   bool operator<(const IdentifierInfo &RHS) const {
468     return getName() < RHS.getName();
469   }
470 
471 private:
472   /// The Preprocessor::HandleIdentifier does several special (but rare)
473   /// things to identifiers of various sorts.  For example, it changes the
474   /// \c for keyword token from tok::identifier to tok::for.
475   ///
476   /// This method is very tied to the definition of HandleIdentifier.  Any
477   /// change to it should be reflected here.
478   void RecomputeNeedsHandleIdentifier() {
479     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
480                             isExtensionToken() || isFutureCompatKeyword() ||
481                             isOutOfDate() || isModulesImport();
482   }
483 };
484 
485 /// An RAII object for [un]poisoning an identifier within a scope.
486 ///
487 /// \p II is allowed to be null, in which case objects of this type have
488 /// no effect.
489 class PoisonIdentifierRAIIObject {
490   IdentifierInfo *const II;
491   const bool OldValue;
492 
493 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)494   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
495     : II(II), OldValue(II ? II->isPoisoned() : false) {
496     if(II)
497       II->setIsPoisoned(NewValue);
498   }
499 
~PoisonIdentifierRAIIObject()500   ~PoisonIdentifierRAIIObject() {
501     if(II)
502       II->setIsPoisoned(OldValue);
503   }
504 };
505 
506 /// An iterator that walks over all of the known identifiers
507 /// in the lookup table.
508 ///
509 /// Since this iterator uses an abstract interface via virtual
510 /// functions, it uses an object-oriented interface rather than the
511 /// more standard C++ STL iterator interface. In this OO-style
512 /// iteration, the single function \c Next() provides dereference,
513 /// advance, and end-of-sequence checking in a single
514 /// operation. Subclasses of this iterator type will provide the
515 /// actual functionality.
516 class IdentifierIterator {
517 protected:
518   IdentifierIterator() = default;
519 
520 public:
521   IdentifierIterator(const IdentifierIterator &) = delete;
522   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
523 
524   virtual ~IdentifierIterator();
525 
526   /// Retrieve the next string in the identifier table and
527   /// advances the iterator for the following string.
528   ///
529   /// \returns The next string in the identifier table. If there is
530   /// no such string, returns an empty \c StringRef.
531   virtual StringRef Next() = 0;
532 };
533 
534 /// Provides lookups to, and iteration over, IdentiferInfo objects.
535 class IdentifierInfoLookup {
536 public:
537   virtual ~IdentifierInfoLookup();
538 
539   /// Return the IdentifierInfo for the specified named identifier.
540   ///
541   /// Unlike the version in IdentifierTable, this returns a pointer instead
542   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
543   /// be found.
544   virtual IdentifierInfo* get(StringRef Name) = 0;
545 
546   /// Retrieve an iterator into the set of all identifiers
547   /// known to this identifier lookup source.
548   ///
549   /// This routine provides access to all of the identifiers known to
550   /// the identifier lookup, allowing access to the contents of the
551   /// identifiers without introducing the overhead of constructing
552   /// IdentifierInfo objects for each.
553   ///
554   /// \returns A new iterator into the set of known identifiers. The
555   /// caller is responsible for deleting this iterator.
556   virtual IdentifierIterator *getIdentifiers();
557 };
558 
559 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
560 ///
561 /// This has no other purpose, but this is an extremely performance-critical
562 /// piece of the code, as each occurrence of every identifier goes through
563 /// here when lexed.
564 class IdentifierTable {
565   // Shark shows that using MallocAllocator is *much* slower than using this
566   // BumpPtrAllocator!
567   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
568   HashTableTy HashTable;
569 
570   IdentifierInfoLookup* ExternalLookup;
571 
572 public:
573   /// Create the identifier table.
574   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
575 
576   /// Create the identifier table, populating it with info about the
577   /// language keywords for the language specified by \p LangOpts.
578   explicit IdentifierTable(const LangOptions &LangOpts,
579                            IdentifierInfoLookup *ExternalLookup = nullptr);
580 
581   /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)582   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
583     ExternalLookup = IILookup;
584   }
585 
586   /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()587   IdentifierInfoLookup *getExternalIdentifierLookup() const {
588     return ExternalLookup;
589   }
590 
getAllocator()591   llvm::BumpPtrAllocator& getAllocator() {
592     return HashTable.getAllocator();
593   }
594 
595   /// Return the identifier token info for the specified named
596   /// identifier.
get(StringRef Name)597   IdentifierInfo &get(StringRef Name) {
598     auto &Entry = *HashTable.try_emplace(Name, nullptr).first;
599 
600     IdentifierInfo *&II = Entry.second;
601     if (II) return *II;
602 
603     // No entry; if we have an external lookup, look there first.
604     if (ExternalLookup) {
605       II = ExternalLookup->get(Name);
606       if (II)
607         return *II;
608     }
609 
610     // Lookups failed, make a new IdentifierInfo.
611     void *Mem = getAllocator().Allocate<IdentifierInfo>();
612     II = new (Mem) IdentifierInfo();
613 
614     // Make sure getName() knows how to find the IdentifierInfo
615     // contents.
616     II->Entry = &Entry;
617 
618     return *II;
619   }
620 
get(StringRef Name,tok::TokenKind TokenCode)621   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
622     IdentifierInfo &II = get(Name);
623     II.TokenID = TokenCode;
624     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
625     return II;
626   }
627 
628   /// Gets an IdentifierInfo for the given name without consulting
629   ///        external sources.
630   ///
631   /// This is a version of get() meant for external sources that want to
632   /// introduce or modify an identifier. If they called get(), they would
633   /// likely end up in a recursion.
getOwn(StringRef Name)634   IdentifierInfo &getOwn(StringRef Name) {
635     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
636 
637     IdentifierInfo *&II = Entry.second;
638     if (II)
639       return *II;
640 
641     // Lookups failed, make a new IdentifierInfo.
642     void *Mem = getAllocator().Allocate<IdentifierInfo>();
643     II = new (Mem) IdentifierInfo();
644 
645     // Make sure getName() knows how to find the IdentifierInfo
646     // contents.
647     II->Entry = &Entry;
648 
649     // If this is the 'import' contextual keyword, mark it as such.
650     if (Name.equals("import"))
651       II->setModulesImport(true);
652 
653     return *II;
654   }
655 
656   using iterator = HashTableTy::const_iterator;
657   using const_iterator = HashTableTy::const_iterator;
658 
begin()659   iterator begin() const { return HashTable.begin(); }
end()660   iterator end() const   { return HashTable.end(); }
size()661   unsigned size() const  { return HashTable.size(); }
662 
find(StringRef Name)663   iterator find(StringRef Name) const { return HashTable.find(Name); }
664 
665   /// Print some statistics to stderr that indicate how well the
666   /// hashing is doing.
667   void PrintStats() const;
668 
669   /// Populate the identifier table with info about the language keywords
670   /// for the language specified by \p LangOpts.
671   void AddKeywords(const LangOptions &LangOpts);
672 
673   /// Returns the correct diagnostic to issue for a future-compat diagnostic
674   /// warning. Note, this function assumes the identifier passed has already
675   /// been determined to be a future compatible keyword.
676   diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
677                                      const LangOptions &LangOpts);
678 };
679 
680 /// A family of Objective-C methods.
681 ///
682 /// These families have no inherent meaning in the language, but are
683 /// nonetheless central enough in the existing implementations to
684 /// merit direct AST support.  While, in theory, arbitrary methods can
685 /// be considered to form families, we focus here on the methods
686 /// involving allocation and retain-count management, as these are the
687 /// most "core" and the most likely to be useful to diverse clients
688 /// without extra information.
689 ///
690 /// Both selectors and actual method declarations may be classified
691 /// into families.  Method families may impose additional restrictions
692 /// beyond their selector name; for example, a method called '_init'
693 /// that returns void is not considered to be in the 'init' family
694 /// (but would be if it returned 'id').  It is also possible to
695 /// explicitly change or remove a method's family.  Therefore the
696 /// method's family should be considered the single source of truth.
697 enum ObjCMethodFamily {
698   /// No particular method family.
699   OMF_None,
700 
701   // Selectors in these families may have arbitrary arity, may be
702   // written with arbitrary leading underscores, and may have
703   // additional CamelCase "words" in their first selector chunk
704   // following the family name.
705   OMF_alloc,
706   OMF_copy,
707   OMF_init,
708   OMF_mutableCopy,
709   OMF_new,
710 
711   // These families are singletons consisting only of the nullary
712   // selector with the given name.
713   OMF_autorelease,
714   OMF_dealloc,
715   OMF_finalize,
716   OMF_release,
717   OMF_retain,
718   OMF_retainCount,
719   OMF_self,
720   OMF_initialize,
721 
722   // performSelector families
723   OMF_performSelector
724 };
725 
726 /// Enough bits to store any enumerator in ObjCMethodFamily or
727 /// InvalidObjCMethodFamily.
728 enum { ObjCMethodFamilyBitWidth = 4 };
729 
730 /// An invalid value of ObjCMethodFamily.
731 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
732 
733 /// A family of Objective-C methods.
734 ///
735 /// These are family of methods whose result type is initially 'id', but
736 /// but are candidate for the result type to be changed to 'instancetype'.
737 enum ObjCInstanceTypeFamily {
738   OIT_None,
739   OIT_Array,
740   OIT_Dictionary,
741   OIT_Singleton,
742   OIT_Init,
743   OIT_ReturnsSelf
744 };
745 
746 enum ObjCStringFormatFamily {
747   SFF_None,
748   SFF_NSString,
749   SFF_CFString
750 };
751 
752 /// Smart pointer class that efficiently represents Objective-C method
753 /// names.
754 ///
755 /// This class will either point to an IdentifierInfo or a
756 /// MultiKeywordSelector (which is private). This enables us to optimize
757 /// selectors that take no arguments and selectors that take 1 argument, which
758 /// accounts for 78% of all selectors in Cocoa.h.
759 class Selector {
760   friend class Diagnostic;
761   friend class SelectorTable; // only the SelectorTable can create these
762   friend class DeclarationName; // and the AST's DeclarationName.
763 
764   enum IdentifierInfoFlag {
765     // Empty selector = 0. Note that these enumeration values must
766     // correspond to the enumeration values of DeclarationName::StoredNameKind
767     ZeroArg  = 0x01,
768     OneArg   = 0x02,
769     MultiArg = 0x07,
770     ArgFlags = 0x07
771   };
772 
773   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
774   /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
775   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
776   /// 8 bytes even on 32 bits archs because of DeclarationName.
777   uintptr_t InfoPtr = 0;
778 
Selector(IdentifierInfo * II,unsigned nArgs)779   Selector(IdentifierInfo *II, unsigned nArgs) {
780     InfoPtr = reinterpret_cast<uintptr_t>(II);
781     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
782     assert(nArgs < 2 && "nArgs not equal to 0/1");
783     InfoPtr |= nArgs+1;
784   }
785 
Selector(MultiKeywordSelector * SI)786   Selector(MultiKeywordSelector *SI) {
787     InfoPtr = reinterpret_cast<uintptr_t>(SI);
788     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
789     InfoPtr |= MultiArg;
790   }
791 
getAsIdentifierInfo()792   IdentifierInfo *getAsIdentifierInfo() const {
793     if (getIdentifierInfoFlag() < MultiArg)
794       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
795     return nullptr;
796   }
797 
getMultiKeywordSelector()798   MultiKeywordSelector *getMultiKeywordSelector() const {
799     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
800   }
801 
getIdentifierInfoFlag()802   unsigned getIdentifierInfoFlag() const {
803     return InfoPtr & ArgFlags;
804   }
805 
806   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
807 
808   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
809 
810 public:
811   /// The default ctor should only be used when creating data structures that
812   ///  will contain selectors.
813   Selector() = default;
Selector(uintptr_t V)814   explicit Selector(uintptr_t V) : InfoPtr(V) {}
815 
816   /// operator==/!= - Indicate whether the specified selectors are identical.
817   bool operator==(Selector RHS) const {
818     return InfoPtr == RHS.InfoPtr;
819   }
820   bool operator!=(Selector RHS) const {
821     return InfoPtr != RHS.InfoPtr;
822   }
823 
getAsOpaquePtr()824   void *getAsOpaquePtr() const {
825     return reinterpret_cast<void*>(InfoPtr);
826   }
827 
828   /// Determine whether this is the empty selector.
isNull()829   bool isNull() const { return InfoPtr == 0; }
830 
831   // Predicates to identify the selector type.
isKeywordSelector()832   bool isKeywordSelector() const {
833     return getIdentifierInfoFlag() != ZeroArg;
834   }
835 
isUnarySelector()836   bool isUnarySelector() const {
837     return getIdentifierInfoFlag() == ZeroArg;
838   }
839 
840   /// If this selector is the specific keyword selector described by Names.
841   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
842 
843   /// If this selector is the specific unary selector described by Name.
844   bool isUnarySelector(StringRef Name) const;
845 
846   unsigned getNumArgs() const;
847 
848   /// Retrieve the identifier at a given position in the selector.
849   ///
850   /// Note that the identifier pointer returned may be NULL. Clients that only
851   /// care about the text of the identifier string, and not the specific,
852   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
853   /// an empty string when the identifier pointer would be NULL.
854   ///
855   /// \param argIndex The index for which we want to retrieve the identifier.
856   /// This index shall be less than \c getNumArgs() unless this is a keyword
857   /// selector, in which case 0 is the only permissible value.
858   ///
859   /// \returns the uniqued identifier for this slot, or NULL if this slot has
860   /// no corresponding identifier.
861   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
862 
863   /// Retrieve the name at a given position in the selector.
864   ///
865   /// \param argIndex The index for which we want to retrieve the name.
866   /// This index shall be less than \c getNumArgs() unless this is a keyword
867   /// selector, in which case 0 is the only permissible value.
868   ///
869   /// \returns the name for this slot, which may be the empty string if no
870   /// name was supplied.
871   StringRef getNameForSlot(unsigned argIndex) const;
872 
873   /// Derive the full selector name (e.g. "foo:bar:") and return
874   /// it as an std::string.
875   std::string getAsString() const;
876 
877   /// Prints the full selector name (e.g. "foo:bar:").
878   void print(llvm::raw_ostream &OS) const;
879 
880   void dump() const;
881 
882   /// Derive the conventional family of this method.
getMethodFamily()883   ObjCMethodFamily getMethodFamily() const {
884     return getMethodFamilyImpl(*this);
885   }
886 
getStringFormatFamily()887   ObjCStringFormatFamily getStringFormatFamily() const {
888     return getStringFormatFamilyImpl(*this);
889   }
890 
getEmptyMarker()891   static Selector getEmptyMarker() {
892     return Selector(uintptr_t(-1));
893   }
894 
getTombstoneMarker()895   static Selector getTombstoneMarker() {
896     return Selector(uintptr_t(-2));
897   }
898 
899   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
900 };
901 
902 /// This table allows us to fully hide how we implement
903 /// multi-keyword caching.
904 class SelectorTable {
905   // Actually a SelectorTableImpl
906   void *Impl;
907 
908 public:
909   SelectorTable();
910   SelectorTable(const SelectorTable &) = delete;
911   SelectorTable &operator=(const SelectorTable &) = delete;
912   ~SelectorTable();
913 
914   /// Can create any sort of selector.
915   ///
916   /// \p NumArgs indicates whether this is a no argument selector "foo", a
917   /// single argument selector "foo:" or multi-argument "foo:bar:".
918   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
919 
getUnarySelector(IdentifierInfo * ID)920   Selector getUnarySelector(IdentifierInfo *ID) {
921     return Selector(ID, 1);
922   }
923 
getNullarySelector(IdentifierInfo * ID)924   Selector getNullarySelector(IdentifierInfo *ID) {
925     return Selector(ID, 0);
926   }
927 
928   /// Return the total amount of memory allocated for managing selectors.
929   size_t getTotalMemory() const;
930 
931   /// Return the default setter name for the given identifier.
932   ///
933   /// This is "set" + \p Name where the initial character of \p Name
934   /// has been capitalized.
935   static SmallString<64> constructSetterName(StringRef Name);
936 
937   /// Return the default setter selector for the given identifier.
938   ///
939   /// This is "set" + \p Name where the initial character of \p Name
940   /// has been capitalized.
941   static Selector constructSetterSelector(IdentifierTable &Idents,
942                                           SelectorTable &SelTable,
943                                           const IdentifierInfo *Name);
944 
945   /// Return the property name for the given setter selector.
946   static std::string getPropertyNameFromSetterSelector(Selector Sel);
947 };
948 
949 namespace detail {
950 
951 /// DeclarationNameExtra is used as a base of various uncommon special names.
952 /// This class is needed since DeclarationName has not enough space to store
953 /// the kind of every possible names. Therefore the kind of common names is
954 /// stored directly in DeclarationName, and the kind of uncommon names is
955 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
956 /// DeclarationName needs the lower 3 bits to store the kind of common names.
957 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
958 /// here is very likely to require changes in DeclarationName(Table).
alignas(IdentifierInfoAlignment)959 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
960   friend class clang::DeclarationName;
961   friend class clang::DeclarationNameTable;
962 
963 protected:
964   /// The kind of "extra" information stored in the DeclarationName. See
965   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
966   /// are used. Note that DeclarationName depends on the numerical values
967   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
968   /// for more info.
969   enum ExtraKind {
970     CXXDeductionGuideName,
971     CXXLiteralOperatorName,
972     CXXUsingDirective,
973     ObjCMultiArgSelector
974   };
975 
976   /// ExtraKindOrNumArgs has one of the following meaning:
977   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
978   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
979   ///    a CXXLiteralOperatorIdName.
980   ///
981   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
982   ///
983   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
984   ///    the number of arguments in the Objective-C selector, in which
985   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
986   unsigned ExtraKindOrNumArgs;
987 
988   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
989   DeclarationNameExtra(unsigned NumArgs)
990       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
991 
992   /// Return the corresponding ExtraKind.
993   ExtraKind getKind() const {
994     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
995                                           (unsigned)ObjCMultiArgSelector
996                                       ? (unsigned)ObjCMultiArgSelector
997                                       : ExtraKindOrNumArgs);
998   }
999 
1000   /// Return the number of arguments in an ObjC selector. Only valid when this
1001   /// is indeed an ObjCMultiArgSelector.
1002   unsigned getNumArgs() const {
1003     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
1004            "getNumArgs called but this is not an ObjC selector!");
1005     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
1006   }
1007 };
1008 
1009 } // namespace detail
1010 
1011 }  // namespace clang
1012 
1013 namespace llvm {
1014 
1015 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1016 /// DenseSets.
1017 template <>
1018 struct DenseMapInfo<clang::Selector> {
1019   static clang::Selector getEmptyKey() {
1020     return clang::Selector::getEmptyMarker();
1021   }
1022 
1023   static clang::Selector getTombstoneKey() {
1024     return clang::Selector::getTombstoneMarker();
1025   }
1026 
1027   static unsigned getHashValue(clang::Selector S);
1028 
1029   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1030     return LHS == RHS;
1031   }
1032 };
1033 
1034 template<>
1035 struct PointerLikeTypeTraits<clang::Selector> {
1036   static const void *getAsVoidPointer(clang::Selector P) {
1037     return P.getAsOpaquePtr();
1038   }
1039 
1040   static clang::Selector getFromVoidPointer(const void *P) {
1041     return clang::Selector(reinterpret_cast<uintptr_t>(P));
1042   }
1043 
1044   static constexpr int NumLowBitsAvailable = 0;
1045 };
1046 
1047 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
1048 // are not guaranteed to be 8-byte aligned.
1049 template<>
1050 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
1051   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
1052     return P;
1053   }
1054 
1055   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
1056     return static_cast<clang::IdentifierInfo*>(P);
1057   }
1058 
1059   static constexpr int NumLowBitsAvailable = 1;
1060 };
1061 
1062 template<>
1063 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
1064   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
1065     return P;
1066   }
1067 
1068   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
1069     return static_cast<const clang::IdentifierInfo*>(P);
1070   }
1071 
1072   static constexpr int NumLowBitsAvailable = 1;
1073 };
1074 
1075 } // namespace llvm
1076 
1077 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1078