1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18 #include "clang/Basic/DiagnosticIDs.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/DenseMapInfo.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Allocator.h"
26 #include "llvm/Support/PointerLikeTypeTraits.h"
27 #include "llvm/Support/type_traits.h"
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 #include <string>
33 #include <utility>
34
35 namespace clang {
36
37 class DeclarationName;
38 class DeclarationNameTable;
39 class IdentifierInfo;
40 class LangOptions;
41 class MultiKeywordSelector;
42 class SourceLocation;
43
44 enum class ReservedIdentifierStatus {
45 NotReserved = 0,
46 StartsWithUnderscoreAtGlobalScope,
47 StartsWithUnderscoreAndIsExternC,
48 StartsWithDoubleUnderscore,
49 StartsWithUnderscoreFollowedByCapitalLetter,
50 ContainsDoubleUnderscore,
51 };
52
53 /// Determine whether an identifier is reserved for use as a name at global
54 /// scope. Such identifiers might be implementation-specific global functions
55 /// or variables.
isReservedAtGlobalScope(ReservedIdentifierStatus Status)56 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
57 return Status != ReservedIdentifierStatus::NotReserved;
58 }
59
60 /// Determine whether an identifier is reserved in all contexts. Such
61 /// identifiers might be implementation-specific keywords or macros, for
62 /// example.
isReservedInAllContexts(ReservedIdentifierStatus Status)63 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
64 return Status != ReservedIdentifierStatus::NotReserved &&
65 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
66 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
67 }
68
69 /// A simple pair of identifier info and location.
70 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
71
72 /// IdentifierInfo and other related classes are aligned to
73 /// 8 bytes so that DeclarationName can use the lower 3 bits
74 /// of a pointer to one of these classes.
75 enum { IdentifierInfoAlignment = 8 };
76
77 static constexpr int ObjCOrBuiltinIDBits = 16;
78
79 /// One of these records is kept for each identifier that
80 /// is lexed. This contains information about whether the token was \#define'd,
81 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
82 /// variable or function name). The preprocessor keeps this information in a
83 /// set, and all tok::identifier tokens have a pointer to one of these.
84 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
alignas(IdentifierInfoAlignment)85 class alignas(IdentifierInfoAlignment) IdentifierInfo {
86 friend class IdentifierTable;
87
88 // Front-end token ID or tok::identifier.
89 unsigned TokenID : 9;
90
91 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
92 // First NUM_OBJC_KEYWORDS values are for Objective-C,
93 // the remaining values are for builtins.
94 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
95
96 // True if there is a #define for this.
97 unsigned HasMacro : 1;
98
99 // True if there was a #define for this.
100 unsigned HadMacro : 1;
101
102 // True if the identifier is a language extension.
103 unsigned IsExtension : 1;
104
105 // True if the identifier is a keyword in a newer or proposed Standard.
106 unsigned IsFutureCompatKeyword : 1;
107
108 // True if the identifier is poisoned.
109 unsigned IsPoisoned : 1;
110
111 // True if the identifier is a C++ operator keyword.
112 unsigned IsCPPOperatorKeyword : 1;
113
114 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
115 // See comment about RecomputeNeedsHandleIdentifier for more info.
116 unsigned NeedsHandleIdentifier : 1;
117
118 // True if the identifier was loaded (at least partially) from an AST file.
119 unsigned IsFromAST : 1;
120
121 // True if the identifier has changed from the definition
122 // loaded from an AST file.
123 unsigned ChangedAfterLoad : 1;
124
125 // True if the identifier's frontend information has changed from the
126 // definition loaded from an AST file.
127 unsigned FEChangedAfterLoad : 1;
128
129 // True if revertTokenIDToIdentifier was called.
130 unsigned RevertedTokenID : 1;
131
132 // True if there may be additional information about
133 // this identifier stored externally.
134 unsigned OutOfDate : 1;
135
136 // True if this is the 'import' contextual keyword.
137 unsigned IsModulesImport : 1;
138
139 // True if this is a mangled OpenMP variant name.
140 unsigned IsMangledOpenMPVariantName : 1;
141
142 // True if this is a deprecated macro.
143 unsigned IsDeprecatedMacro : 1;
144
145 // True if this macro is unsafe in headers.
146 unsigned IsRestrictExpansion : 1;
147
148 // True if this macro is final.
149 unsigned IsFinal : 1;
150
151 // 22 bits left in a 64-bit word.
152
153 // Managed by the language front-end.
154 void *FETokenInfo = nullptr;
155
156 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
157
158 IdentifierInfo()
159 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
160 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
161 IsPoisoned(false), IsCPPOperatorKeyword(false),
162 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
163 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
164 IsModulesImport(false), IsMangledOpenMPVariantName(false),
165 IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
166
167 public:
168 IdentifierInfo(const IdentifierInfo &) = delete;
169 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
170 IdentifierInfo(IdentifierInfo &&) = delete;
171 IdentifierInfo &operator=(IdentifierInfo &&) = delete;
172
173 /// Return true if this is the identifier for the specified string.
174 ///
175 /// This is intended to be used for string literals only: II->isStr("foo").
176 template <std::size_t StrLen>
177 bool isStr(const char (&Str)[StrLen]) const {
178 return getLength() == StrLen-1 &&
179 memcmp(getNameStart(), Str, StrLen-1) == 0;
180 }
181
182 /// Return true if this is the identifier for the specified StringRef.
183 bool isStr(llvm::StringRef Str) const {
184 llvm::StringRef ThisStr(getNameStart(), getLength());
185 return ThisStr == Str;
186 }
187
188 /// Return the beginning of the actual null-terminated string for this
189 /// identifier.
190 const char *getNameStart() const { return Entry->getKeyData(); }
191
192 /// Efficiently return the length of this identifier info.
193 unsigned getLength() const { return Entry->getKeyLength(); }
194
195 /// Return the actual identifier string.
196 StringRef getName() const {
197 return StringRef(getNameStart(), getLength());
198 }
199
200 /// Return true if this identifier is \#defined to some other value.
201 /// \note The current definition may be in a module and not currently visible.
202 bool hasMacroDefinition() const {
203 return HasMacro;
204 }
205 void setHasMacroDefinition(bool Val) {
206 if (HasMacro == Val) return;
207
208 HasMacro = Val;
209 if (Val) {
210 NeedsHandleIdentifier = true;
211 HadMacro = true;
212 } else {
213 // If this is a final macro, make the deprecation and header unsafe bits
214 // stick around after the undefinition so they apply to any redefinitions.
215 if (!IsFinal) {
216 // Because calling the setters of these calls recomputes, just set them
217 // manually to avoid recomputing a bunch of times.
218 IsDeprecatedMacro = false;
219 IsRestrictExpansion = false;
220 }
221 RecomputeNeedsHandleIdentifier();
222 }
223 }
224 /// Returns true if this identifier was \#defined to some value at any
225 /// moment. In this case there should be an entry for the identifier in the
226 /// macro history table in Preprocessor.
227 bool hadMacroDefinition() const {
228 return HadMacro;
229 }
230
231 bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
232
233 void setIsDeprecatedMacro(bool Val) {
234 if (IsDeprecatedMacro == Val)
235 return;
236 IsDeprecatedMacro = Val;
237 if (Val)
238 NeedsHandleIdentifier = true;
239 else
240 RecomputeNeedsHandleIdentifier();
241 }
242
243 bool isRestrictExpansion() const { return IsRestrictExpansion; }
244
245 void setIsRestrictExpansion(bool Val) {
246 if (IsRestrictExpansion == Val)
247 return;
248 IsRestrictExpansion = Val;
249 if (Val)
250 NeedsHandleIdentifier = true;
251 else
252 RecomputeNeedsHandleIdentifier();
253 }
254
255 bool isFinal() const { return IsFinal; }
256
257 void setIsFinal(bool Val) { IsFinal = Val; }
258
259 /// If this is a source-language token (e.g. 'for'), this API
260 /// can be used to cause the lexer to map identifiers to source-language
261 /// tokens.
262 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
263
264 /// True if revertTokenIDToIdentifier() was called.
265 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
266
267 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
268 /// compatibility.
269 ///
270 /// TokenID is normally read-only but there are 2 instances where we revert it
271 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
272 /// using this method so we can inform serialization about it.
273 void revertTokenIDToIdentifier() {
274 assert(TokenID != tok::identifier && "Already at tok::identifier");
275 TokenID = tok::identifier;
276 RevertedTokenID = true;
277 }
278 void revertIdentifierToTokenID(tok::TokenKind TK) {
279 assert(TokenID == tok::identifier && "Should be at tok::identifier");
280 TokenID = TK;
281 RevertedTokenID = false;
282 }
283
284 /// Return the preprocessor keyword ID for this identifier.
285 ///
286 /// For example, "define" will return tok::pp_define.
287 tok::PPKeywordKind getPPKeywordID() const;
288
289 /// Return the Objective-C keyword ID for the this identifier.
290 ///
291 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
292 tok::ObjCKeywordKind getObjCKeywordID() const {
293 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
294 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
295 else
296 return tok::objc_not_keyword;
297 }
298 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
299
300 /// Return a value indicating whether this is a builtin function.
301 ///
302 /// 0 is not-built-in. 1+ are specific builtin functions.
303 unsigned getBuiltinID() const {
304 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
305 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
306 else
307 return 0;
308 }
309 void setBuiltinID(unsigned ID) {
310 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
311 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
312 && "ID too large for field!");
313 }
314
315 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
316 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
317
318 /// get/setExtension - Initialize information about whether or not this
319 /// language token is an extension. This controls extension warnings, and is
320 /// only valid if a custom token ID is set.
321 bool isExtensionToken() const { return IsExtension; }
322 void setIsExtensionToken(bool Val) {
323 IsExtension = Val;
324 if (Val)
325 NeedsHandleIdentifier = true;
326 else
327 RecomputeNeedsHandleIdentifier();
328 }
329
330 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
331 /// this language token is a keyword in a newer or proposed Standard. This
332 /// controls compatibility warnings, and is only true when not parsing the
333 /// corresponding Standard. Once a compatibility problem has been diagnosed
334 /// with this keyword, the flag will be cleared.
335 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
336 void setIsFutureCompatKeyword(bool Val) {
337 IsFutureCompatKeyword = Val;
338 if (Val)
339 NeedsHandleIdentifier = true;
340 else
341 RecomputeNeedsHandleIdentifier();
342 }
343
344 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
345 /// Preprocessor will emit an error every time this token is used.
346 void setIsPoisoned(bool Value = true) {
347 IsPoisoned = Value;
348 if (Value)
349 NeedsHandleIdentifier = true;
350 else
351 RecomputeNeedsHandleIdentifier();
352 }
353
354 /// Return true if this token has been poisoned.
355 bool isPoisoned() const { return IsPoisoned; }
356
357 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
358 /// this identifier is a C++ alternate representation of an operator.
359 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
360 IsCPPOperatorKeyword = Val;
361 }
362 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
363
364 /// Return true if this token is a keyword in the specified language.
365 bool isKeyword(const LangOptions &LangOpts) const;
366
367 /// Return true if this token is a C++ keyword in the specified
368 /// language.
369 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
370
371 /// Get and set FETokenInfo. The language front-end is allowed to associate
372 /// arbitrary metadata with this token.
373 void *getFETokenInfo() const { return FETokenInfo; }
374 void setFETokenInfo(void *T) { FETokenInfo = T; }
375
376 /// Return true if the Preprocessor::HandleIdentifier must be called
377 /// on a token of this identifier.
378 ///
379 /// If this returns false, we know that HandleIdentifier will not affect
380 /// the token.
381 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
382
383 /// Return true if the identifier in its current state was loaded
384 /// from an AST file.
385 bool isFromAST() const { return IsFromAST; }
386
387 void setIsFromAST() { IsFromAST = true; }
388
389 /// Determine whether this identifier has changed since it was loaded
390 /// from an AST file.
391 bool hasChangedSinceDeserialization() const {
392 return ChangedAfterLoad;
393 }
394
395 /// Note that this identifier has changed since it was loaded from
396 /// an AST file.
397 void setChangedSinceDeserialization() {
398 ChangedAfterLoad = true;
399 }
400
401 /// Determine whether the frontend token information for this
402 /// identifier has changed since it was loaded from an AST file.
403 bool hasFETokenInfoChangedSinceDeserialization() const {
404 return FEChangedAfterLoad;
405 }
406
407 /// Note that the frontend token information for this identifier has
408 /// changed since it was loaded from an AST file.
409 void setFETokenInfoChangedSinceDeserialization() {
410 FEChangedAfterLoad = true;
411 }
412
413 /// Determine whether the information for this identifier is out of
414 /// date with respect to the external source.
415 bool isOutOfDate() const { return OutOfDate; }
416
417 /// Set whether the information for this identifier is out of
418 /// date with respect to the external source.
419 void setOutOfDate(bool OOD) {
420 OutOfDate = OOD;
421 if (OOD)
422 NeedsHandleIdentifier = true;
423 else
424 RecomputeNeedsHandleIdentifier();
425 }
426
427 /// Determine whether this is the contextual keyword \c import.
428 bool isModulesImport() const { return IsModulesImport; }
429
430 /// Set whether this identifier is the contextual keyword \c import.
431 void setModulesImport(bool I) {
432 IsModulesImport = I;
433 if (I)
434 NeedsHandleIdentifier = true;
435 else
436 RecomputeNeedsHandleIdentifier();
437 }
438
439 /// Determine whether this is the mangled name of an OpenMP variant.
440 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
441
442 /// Set whether this is the mangled name of an OpenMP variant.
443 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
444
445 /// Return true if this identifier is an editor placeholder.
446 ///
447 /// Editor placeholders are produced by the code-completion engine and are
448 /// represented as characters between '<#' and '#>' in the source code. An
449 /// example of auto-completed call with a placeholder parameter is shown
450 /// below:
451 /// \code
452 /// function(<#int x#>);
453 /// \endcode
454 bool isEditorPlaceholder() const {
455 return getName().startswith("<#") && getName().endswith("#>");
456 }
457
458 /// Determine whether \p this is a name reserved for the implementation (C99
459 /// 7.1.3, C++ [lib.global.names]).
460 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
461
462 /// If the identifier is an "uglified" reserved name, return a cleaned form.
463 /// e.g. _Foo => Foo. Otherwise, just returns the name.
464 StringRef deuglifiedName() const;
465
466 /// Provide less than operator for lexicographical sorting.
467 bool operator<(const IdentifierInfo &RHS) const {
468 return getName() < RHS.getName();
469 }
470
471 private:
472 /// The Preprocessor::HandleIdentifier does several special (but rare)
473 /// things to identifiers of various sorts. For example, it changes the
474 /// \c for keyword token from tok::identifier to tok::for.
475 ///
476 /// This method is very tied to the definition of HandleIdentifier. Any
477 /// change to it should be reflected here.
478 void RecomputeNeedsHandleIdentifier() {
479 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
480 isExtensionToken() || isFutureCompatKeyword() ||
481 isOutOfDate() || isModulesImport();
482 }
483 };
484
485 /// An RAII object for [un]poisoning an identifier within a scope.
486 ///
487 /// \p II is allowed to be null, in which case objects of this type have
488 /// no effect.
489 class PoisonIdentifierRAIIObject {
490 IdentifierInfo *const II;
491 const bool OldValue;
492
493 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)494 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
495 : II(II), OldValue(II ? II->isPoisoned() : false) {
496 if(II)
497 II->setIsPoisoned(NewValue);
498 }
499
~PoisonIdentifierRAIIObject()500 ~PoisonIdentifierRAIIObject() {
501 if(II)
502 II->setIsPoisoned(OldValue);
503 }
504 };
505
506 /// An iterator that walks over all of the known identifiers
507 /// in the lookup table.
508 ///
509 /// Since this iterator uses an abstract interface via virtual
510 /// functions, it uses an object-oriented interface rather than the
511 /// more standard C++ STL iterator interface. In this OO-style
512 /// iteration, the single function \c Next() provides dereference,
513 /// advance, and end-of-sequence checking in a single
514 /// operation. Subclasses of this iterator type will provide the
515 /// actual functionality.
516 class IdentifierIterator {
517 protected:
518 IdentifierIterator() = default;
519
520 public:
521 IdentifierIterator(const IdentifierIterator &) = delete;
522 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
523
524 virtual ~IdentifierIterator();
525
526 /// Retrieve the next string in the identifier table and
527 /// advances the iterator for the following string.
528 ///
529 /// \returns The next string in the identifier table. If there is
530 /// no such string, returns an empty \c StringRef.
531 virtual StringRef Next() = 0;
532 };
533
534 /// Provides lookups to, and iteration over, IdentiferInfo objects.
535 class IdentifierInfoLookup {
536 public:
537 virtual ~IdentifierInfoLookup();
538
539 /// Return the IdentifierInfo for the specified named identifier.
540 ///
541 /// Unlike the version in IdentifierTable, this returns a pointer instead
542 /// of a reference. If the pointer is null then the IdentifierInfo cannot
543 /// be found.
544 virtual IdentifierInfo* get(StringRef Name) = 0;
545
546 /// Retrieve an iterator into the set of all identifiers
547 /// known to this identifier lookup source.
548 ///
549 /// This routine provides access to all of the identifiers known to
550 /// the identifier lookup, allowing access to the contents of the
551 /// identifiers without introducing the overhead of constructing
552 /// IdentifierInfo objects for each.
553 ///
554 /// \returns A new iterator into the set of known identifiers. The
555 /// caller is responsible for deleting this iterator.
556 virtual IdentifierIterator *getIdentifiers();
557 };
558
559 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
560 ///
561 /// This has no other purpose, but this is an extremely performance-critical
562 /// piece of the code, as each occurrence of every identifier goes through
563 /// here when lexed.
564 class IdentifierTable {
565 // Shark shows that using MallocAllocator is *much* slower than using this
566 // BumpPtrAllocator!
567 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
568 HashTableTy HashTable;
569
570 IdentifierInfoLookup* ExternalLookup;
571
572 public:
573 /// Create the identifier table.
574 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
575
576 /// Create the identifier table, populating it with info about the
577 /// language keywords for the language specified by \p LangOpts.
578 explicit IdentifierTable(const LangOptions &LangOpts,
579 IdentifierInfoLookup *ExternalLookup = nullptr);
580
581 /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)582 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
583 ExternalLookup = IILookup;
584 }
585
586 /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()587 IdentifierInfoLookup *getExternalIdentifierLookup() const {
588 return ExternalLookup;
589 }
590
getAllocator()591 llvm::BumpPtrAllocator& getAllocator() {
592 return HashTable.getAllocator();
593 }
594
595 /// Return the identifier token info for the specified named
596 /// identifier.
get(StringRef Name)597 IdentifierInfo &get(StringRef Name) {
598 auto &Entry = *HashTable.try_emplace(Name, nullptr).first;
599
600 IdentifierInfo *&II = Entry.second;
601 if (II) return *II;
602
603 // No entry; if we have an external lookup, look there first.
604 if (ExternalLookup) {
605 II = ExternalLookup->get(Name);
606 if (II)
607 return *II;
608 }
609
610 // Lookups failed, make a new IdentifierInfo.
611 void *Mem = getAllocator().Allocate<IdentifierInfo>();
612 II = new (Mem) IdentifierInfo();
613
614 // Make sure getName() knows how to find the IdentifierInfo
615 // contents.
616 II->Entry = &Entry;
617
618 return *II;
619 }
620
get(StringRef Name,tok::TokenKind TokenCode)621 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
622 IdentifierInfo &II = get(Name);
623 II.TokenID = TokenCode;
624 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
625 return II;
626 }
627
628 /// Gets an IdentifierInfo for the given name without consulting
629 /// external sources.
630 ///
631 /// This is a version of get() meant for external sources that want to
632 /// introduce or modify an identifier. If they called get(), they would
633 /// likely end up in a recursion.
getOwn(StringRef Name)634 IdentifierInfo &getOwn(StringRef Name) {
635 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
636
637 IdentifierInfo *&II = Entry.second;
638 if (II)
639 return *II;
640
641 // Lookups failed, make a new IdentifierInfo.
642 void *Mem = getAllocator().Allocate<IdentifierInfo>();
643 II = new (Mem) IdentifierInfo();
644
645 // Make sure getName() knows how to find the IdentifierInfo
646 // contents.
647 II->Entry = &Entry;
648
649 // If this is the 'import' contextual keyword, mark it as such.
650 if (Name.equals("import"))
651 II->setModulesImport(true);
652
653 return *II;
654 }
655
656 using iterator = HashTableTy::const_iterator;
657 using const_iterator = HashTableTy::const_iterator;
658
begin()659 iterator begin() const { return HashTable.begin(); }
end()660 iterator end() const { return HashTable.end(); }
size()661 unsigned size() const { return HashTable.size(); }
662
find(StringRef Name)663 iterator find(StringRef Name) const { return HashTable.find(Name); }
664
665 /// Print some statistics to stderr that indicate how well the
666 /// hashing is doing.
667 void PrintStats() const;
668
669 /// Populate the identifier table with info about the language keywords
670 /// for the language specified by \p LangOpts.
671 void AddKeywords(const LangOptions &LangOpts);
672
673 /// Returns the correct diagnostic to issue for a future-compat diagnostic
674 /// warning. Note, this function assumes the identifier passed has already
675 /// been determined to be a future compatible keyword.
676 diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
677 const LangOptions &LangOpts);
678 };
679
680 /// A family of Objective-C methods.
681 ///
682 /// These families have no inherent meaning in the language, but are
683 /// nonetheless central enough in the existing implementations to
684 /// merit direct AST support. While, in theory, arbitrary methods can
685 /// be considered to form families, we focus here on the methods
686 /// involving allocation and retain-count management, as these are the
687 /// most "core" and the most likely to be useful to diverse clients
688 /// without extra information.
689 ///
690 /// Both selectors and actual method declarations may be classified
691 /// into families. Method families may impose additional restrictions
692 /// beyond their selector name; for example, a method called '_init'
693 /// that returns void is not considered to be in the 'init' family
694 /// (but would be if it returned 'id'). It is also possible to
695 /// explicitly change or remove a method's family. Therefore the
696 /// method's family should be considered the single source of truth.
697 enum ObjCMethodFamily {
698 /// No particular method family.
699 OMF_None,
700
701 // Selectors in these families may have arbitrary arity, may be
702 // written with arbitrary leading underscores, and may have
703 // additional CamelCase "words" in their first selector chunk
704 // following the family name.
705 OMF_alloc,
706 OMF_copy,
707 OMF_init,
708 OMF_mutableCopy,
709 OMF_new,
710
711 // These families are singletons consisting only of the nullary
712 // selector with the given name.
713 OMF_autorelease,
714 OMF_dealloc,
715 OMF_finalize,
716 OMF_release,
717 OMF_retain,
718 OMF_retainCount,
719 OMF_self,
720 OMF_initialize,
721
722 // performSelector families
723 OMF_performSelector
724 };
725
726 /// Enough bits to store any enumerator in ObjCMethodFamily or
727 /// InvalidObjCMethodFamily.
728 enum { ObjCMethodFamilyBitWidth = 4 };
729
730 /// An invalid value of ObjCMethodFamily.
731 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
732
733 /// A family of Objective-C methods.
734 ///
735 /// These are family of methods whose result type is initially 'id', but
736 /// but are candidate for the result type to be changed to 'instancetype'.
737 enum ObjCInstanceTypeFamily {
738 OIT_None,
739 OIT_Array,
740 OIT_Dictionary,
741 OIT_Singleton,
742 OIT_Init,
743 OIT_ReturnsSelf
744 };
745
746 enum ObjCStringFormatFamily {
747 SFF_None,
748 SFF_NSString,
749 SFF_CFString
750 };
751
752 /// Smart pointer class that efficiently represents Objective-C method
753 /// names.
754 ///
755 /// This class will either point to an IdentifierInfo or a
756 /// MultiKeywordSelector (which is private). This enables us to optimize
757 /// selectors that take no arguments and selectors that take 1 argument, which
758 /// accounts for 78% of all selectors in Cocoa.h.
759 class Selector {
760 friend class Diagnostic;
761 friend class SelectorTable; // only the SelectorTable can create these
762 friend class DeclarationName; // and the AST's DeclarationName.
763
764 enum IdentifierInfoFlag {
765 // Empty selector = 0. Note that these enumeration values must
766 // correspond to the enumeration values of DeclarationName::StoredNameKind
767 ZeroArg = 0x01,
768 OneArg = 0x02,
769 MultiArg = 0x07,
770 ArgFlags = 0x07
771 };
772
773 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
774 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
775 /// case IdentifierInfo and MultiKeywordSelector are already aligned to
776 /// 8 bytes even on 32 bits archs because of DeclarationName.
777 uintptr_t InfoPtr = 0;
778
Selector(IdentifierInfo * II,unsigned nArgs)779 Selector(IdentifierInfo *II, unsigned nArgs) {
780 InfoPtr = reinterpret_cast<uintptr_t>(II);
781 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
782 assert(nArgs < 2 && "nArgs not equal to 0/1");
783 InfoPtr |= nArgs+1;
784 }
785
Selector(MultiKeywordSelector * SI)786 Selector(MultiKeywordSelector *SI) {
787 InfoPtr = reinterpret_cast<uintptr_t>(SI);
788 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
789 InfoPtr |= MultiArg;
790 }
791
getAsIdentifierInfo()792 IdentifierInfo *getAsIdentifierInfo() const {
793 if (getIdentifierInfoFlag() < MultiArg)
794 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
795 return nullptr;
796 }
797
getMultiKeywordSelector()798 MultiKeywordSelector *getMultiKeywordSelector() const {
799 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
800 }
801
getIdentifierInfoFlag()802 unsigned getIdentifierInfoFlag() const {
803 return InfoPtr & ArgFlags;
804 }
805
806 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
807
808 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
809
810 public:
811 /// The default ctor should only be used when creating data structures that
812 /// will contain selectors.
813 Selector() = default;
Selector(uintptr_t V)814 explicit Selector(uintptr_t V) : InfoPtr(V) {}
815
816 /// operator==/!= - Indicate whether the specified selectors are identical.
817 bool operator==(Selector RHS) const {
818 return InfoPtr == RHS.InfoPtr;
819 }
820 bool operator!=(Selector RHS) const {
821 return InfoPtr != RHS.InfoPtr;
822 }
823
getAsOpaquePtr()824 void *getAsOpaquePtr() const {
825 return reinterpret_cast<void*>(InfoPtr);
826 }
827
828 /// Determine whether this is the empty selector.
isNull()829 bool isNull() const { return InfoPtr == 0; }
830
831 // Predicates to identify the selector type.
isKeywordSelector()832 bool isKeywordSelector() const {
833 return getIdentifierInfoFlag() != ZeroArg;
834 }
835
isUnarySelector()836 bool isUnarySelector() const {
837 return getIdentifierInfoFlag() == ZeroArg;
838 }
839
840 /// If this selector is the specific keyword selector described by Names.
841 bool isKeywordSelector(ArrayRef<StringRef> Names) const;
842
843 /// If this selector is the specific unary selector described by Name.
844 bool isUnarySelector(StringRef Name) const;
845
846 unsigned getNumArgs() const;
847
848 /// Retrieve the identifier at a given position in the selector.
849 ///
850 /// Note that the identifier pointer returned may be NULL. Clients that only
851 /// care about the text of the identifier string, and not the specific,
852 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
853 /// an empty string when the identifier pointer would be NULL.
854 ///
855 /// \param argIndex The index for which we want to retrieve the identifier.
856 /// This index shall be less than \c getNumArgs() unless this is a keyword
857 /// selector, in which case 0 is the only permissible value.
858 ///
859 /// \returns the uniqued identifier for this slot, or NULL if this slot has
860 /// no corresponding identifier.
861 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
862
863 /// Retrieve the name at a given position in the selector.
864 ///
865 /// \param argIndex The index for which we want to retrieve the name.
866 /// This index shall be less than \c getNumArgs() unless this is a keyword
867 /// selector, in which case 0 is the only permissible value.
868 ///
869 /// \returns the name for this slot, which may be the empty string if no
870 /// name was supplied.
871 StringRef getNameForSlot(unsigned argIndex) const;
872
873 /// Derive the full selector name (e.g. "foo:bar:") and return
874 /// it as an std::string.
875 std::string getAsString() const;
876
877 /// Prints the full selector name (e.g. "foo:bar:").
878 void print(llvm::raw_ostream &OS) const;
879
880 void dump() const;
881
882 /// Derive the conventional family of this method.
getMethodFamily()883 ObjCMethodFamily getMethodFamily() const {
884 return getMethodFamilyImpl(*this);
885 }
886
getStringFormatFamily()887 ObjCStringFormatFamily getStringFormatFamily() const {
888 return getStringFormatFamilyImpl(*this);
889 }
890
getEmptyMarker()891 static Selector getEmptyMarker() {
892 return Selector(uintptr_t(-1));
893 }
894
getTombstoneMarker()895 static Selector getTombstoneMarker() {
896 return Selector(uintptr_t(-2));
897 }
898
899 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
900 };
901
902 /// This table allows us to fully hide how we implement
903 /// multi-keyword caching.
904 class SelectorTable {
905 // Actually a SelectorTableImpl
906 void *Impl;
907
908 public:
909 SelectorTable();
910 SelectorTable(const SelectorTable &) = delete;
911 SelectorTable &operator=(const SelectorTable &) = delete;
912 ~SelectorTable();
913
914 /// Can create any sort of selector.
915 ///
916 /// \p NumArgs indicates whether this is a no argument selector "foo", a
917 /// single argument selector "foo:" or multi-argument "foo:bar:".
918 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
919
getUnarySelector(IdentifierInfo * ID)920 Selector getUnarySelector(IdentifierInfo *ID) {
921 return Selector(ID, 1);
922 }
923
getNullarySelector(IdentifierInfo * ID)924 Selector getNullarySelector(IdentifierInfo *ID) {
925 return Selector(ID, 0);
926 }
927
928 /// Return the total amount of memory allocated for managing selectors.
929 size_t getTotalMemory() const;
930
931 /// Return the default setter name for the given identifier.
932 ///
933 /// This is "set" + \p Name where the initial character of \p Name
934 /// has been capitalized.
935 static SmallString<64> constructSetterName(StringRef Name);
936
937 /// Return the default setter selector for the given identifier.
938 ///
939 /// This is "set" + \p Name where the initial character of \p Name
940 /// has been capitalized.
941 static Selector constructSetterSelector(IdentifierTable &Idents,
942 SelectorTable &SelTable,
943 const IdentifierInfo *Name);
944
945 /// Return the property name for the given setter selector.
946 static std::string getPropertyNameFromSetterSelector(Selector Sel);
947 };
948
949 namespace detail {
950
951 /// DeclarationNameExtra is used as a base of various uncommon special names.
952 /// This class is needed since DeclarationName has not enough space to store
953 /// the kind of every possible names. Therefore the kind of common names is
954 /// stored directly in DeclarationName, and the kind of uncommon names is
955 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
956 /// DeclarationName needs the lower 3 bits to store the kind of common names.
957 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
958 /// here is very likely to require changes in DeclarationName(Table).
alignas(IdentifierInfoAlignment)959 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
960 friend class clang::DeclarationName;
961 friend class clang::DeclarationNameTable;
962
963 protected:
964 /// The kind of "extra" information stored in the DeclarationName. See
965 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
966 /// are used. Note that DeclarationName depends on the numerical values
967 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
968 /// for more info.
969 enum ExtraKind {
970 CXXDeductionGuideName,
971 CXXLiteralOperatorName,
972 CXXUsingDirective,
973 ObjCMultiArgSelector
974 };
975
976 /// ExtraKindOrNumArgs has one of the following meaning:
977 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
978 /// is in this case in fact either a CXXDeductionGuideNameExtra or
979 /// a CXXLiteralOperatorIdName.
980 ///
981 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
982 ///
983 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
984 /// the number of arguments in the Objective-C selector, in which
985 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
986 unsigned ExtraKindOrNumArgs;
987
988 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
989 DeclarationNameExtra(unsigned NumArgs)
990 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
991
992 /// Return the corresponding ExtraKind.
993 ExtraKind getKind() const {
994 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
995 (unsigned)ObjCMultiArgSelector
996 ? (unsigned)ObjCMultiArgSelector
997 : ExtraKindOrNumArgs);
998 }
999
1000 /// Return the number of arguments in an ObjC selector. Only valid when this
1001 /// is indeed an ObjCMultiArgSelector.
1002 unsigned getNumArgs() const {
1003 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
1004 "getNumArgs called but this is not an ObjC selector!");
1005 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
1006 }
1007 };
1008
1009 } // namespace detail
1010
1011 } // namespace clang
1012
1013 namespace llvm {
1014
1015 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
1016 /// DenseSets.
1017 template <>
1018 struct DenseMapInfo<clang::Selector> {
1019 static clang::Selector getEmptyKey() {
1020 return clang::Selector::getEmptyMarker();
1021 }
1022
1023 static clang::Selector getTombstoneKey() {
1024 return clang::Selector::getTombstoneMarker();
1025 }
1026
1027 static unsigned getHashValue(clang::Selector S);
1028
1029 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
1030 return LHS == RHS;
1031 }
1032 };
1033
1034 template<>
1035 struct PointerLikeTypeTraits<clang::Selector> {
1036 static const void *getAsVoidPointer(clang::Selector P) {
1037 return P.getAsOpaquePtr();
1038 }
1039
1040 static clang::Selector getFromVoidPointer(const void *P) {
1041 return clang::Selector(reinterpret_cast<uintptr_t>(P));
1042 }
1043
1044 static constexpr int NumLowBitsAvailable = 0;
1045 };
1046
1047 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
1048 // are not guaranteed to be 8-byte aligned.
1049 template<>
1050 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
1051 static void *getAsVoidPointer(clang::IdentifierInfo* P) {
1052 return P;
1053 }
1054
1055 static clang::IdentifierInfo *getFromVoidPointer(void *P) {
1056 return static_cast<clang::IdentifierInfo*>(P);
1057 }
1058
1059 static constexpr int NumLowBitsAvailable = 1;
1060 };
1061
1062 template<>
1063 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
1064 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
1065 return P;
1066 }
1067
1068 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
1069 return static_cast<const clang::IdentifierInfo*>(P);
1070 }
1071
1072 static constexpr int NumLowBitsAvailable = 1;
1073 };
1074
1075 } // namespace llvm
1076
1077 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1078