1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseMapInfo.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/PointerLikeTypeTraits.h"
26 #include "llvm/Support/type_traits.h"
27 #include <cassert>
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <string>
32 #include <utility>
33
34 namespace clang {
35
36 class DeclarationName;
37 class DeclarationNameTable;
38 class IdentifierInfo;
39 class LangOptions;
40 class MultiKeywordSelector;
41 class SourceLocation;
42
43 enum class ReservedIdentifierStatus {
44 NotReserved = 0,
45 StartsWithUnderscoreAtGlobalScope,
46 StartsWithDoubleUnderscore,
47 StartsWithUnderscoreFollowedByCapitalLetter,
48 ContainsDoubleUnderscore,
49 };
50
51 /// A simple pair of identifier info and location.
52 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
53
54 /// IdentifierInfo and other related classes are aligned to
55 /// 8 bytes so that DeclarationName can use the lower 3 bits
56 /// of a pointer to one of these classes.
57 enum { IdentifierInfoAlignment = 8 };
58
59 static constexpr int ObjCOrBuiltinIDBits = 15;
60
61 /// One of these records is kept for each identifier that
62 /// is lexed. This contains information about whether the token was \#define'd,
63 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
64 /// variable or function name). The preprocessor keeps this information in a
65 /// set, and all tok::identifier tokens have a pointer to one of these.
66 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
alignas(IdentifierInfoAlignment)67 class alignas(IdentifierInfoAlignment) IdentifierInfo {
68 friend class IdentifierTable;
69
70 // Front-end token ID or tok::identifier.
71 unsigned TokenID : 9;
72
73 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
74 // First NUM_OBJC_KEYWORDS values are for Objective-C,
75 // the remaining values are for builtins.
76 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
77
78 // True if there is a #define for this.
79 unsigned HasMacro : 1;
80
81 // True if there was a #define for this.
82 unsigned HadMacro : 1;
83
84 // True if the identifier is a language extension.
85 unsigned IsExtension : 1;
86
87 // True if the identifier is a keyword in a newer or proposed Standard.
88 unsigned IsFutureCompatKeyword : 1;
89
90 // True if the identifier is poisoned.
91 unsigned IsPoisoned : 1;
92
93 // True if the identifier is a C++ operator keyword.
94 unsigned IsCPPOperatorKeyword : 1;
95
96 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
97 // See comment about RecomputeNeedsHandleIdentifier for more info.
98 unsigned NeedsHandleIdentifier : 1;
99
100 // True if the identifier was loaded (at least partially) from an AST file.
101 unsigned IsFromAST : 1;
102
103 // True if the identifier has changed from the definition
104 // loaded from an AST file.
105 unsigned ChangedAfterLoad : 1;
106
107 // True if the identifier's frontend information has changed from the
108 // definition loaded from an AST file.
109 unsigned FEChangedAfterLoad : 1;
110
111 // True if revertTokenIDToIdentifier was called.
112 unsigned RevertedTokenID : 1;
113
114 // True if there may be additional information about
115 // this identifier stored externally.
116 unsigned OutOfDate : 1;
117
118 // True if this is the 'import' contextual keyword.
119 unsigned IsModulesImport : 1;
120
121 // True if this is a mangled OpenMP variant name.
122 unsigned IsMangledOpenMPVariantName : 1;
123
124 // 28 bits left in a 64-bit word.
125
126 // Managed by the language front-end.
127 void *FETokenInfo = nullptr;
128
129 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
130
131 IdentifierInfo()
132 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
133 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
134 IsPoisoned(false), IsCPPOperatorKeyword(false),
135 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
136 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
137 IsModulesImport(false), IsMangledOpenMPVariantName(false) {}
138
139 public:
140 IdentifierInfo(const IdentifierInfo &) = delete;
141 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
142 IdentifierInfo(IdentifierInfo &&) = delete;
143 IdentifierInfo &operator=(IdentifierInfo &&) = delete;
144
145 /// Return true if this is the identifier for the specified string.
146 ///
147 /// This is intended to be used for string literals only: II->isStr("foo").
148 template <std::size_t StrLen>
149 bool isStr(const char (&Str)[StrLen]) const {
150 return getLength() == StrLen-1 &&
151 memcmp(getNameStart(), Str, StrLen-1) == 0;
152 }
153
154 /// Return true if this is the identifier for the specified StringRef.
155 bool isStr(llvm::StringRef Str) const {
156 llvm::StringRef ThisStr(getNameStart(), getLength());
157 return ThisStr == Str;
158 }
159
160 /// Return the beginning of the actual null-terminated string for this
161 /// identifier.
162 const char *getNameStart() const { return Entry->getKeyData(); }
163
164 /// Efficiently return the length of this identifier info.
165 unsigned getLength() const { return Entry->getKeyLength(); }
166
167 /// Return the actual identifier string.
168 StringRef getName() const {
169 return StringRef(getNameStart(), getLength());
170 }
171
172 /// Return true if this identifier is \#defined to some other value.
173 /// \note The current definition may be in a module and not currently visible.
174 bool hasMacroDefinition() const {
175 return HasMacro;
176 }
177 void setHasMacroDefinition(bool Val) {
178 if (HasMacro == Val) return;
179
180 HasMacro = Val;
181 if (Val) {
182 NeedsHandleIdentifier = true;
183 HadMacro = true;
184 } else {
185 RecomputeNeedsHandleIdentifier();
186 }
187 }
188 /// Returns true if this identifier was \#defined to some value at any
189 /// moment. In this case there should be an entry for the identifier in the
190 /// macro history table in Preprocessor.
191 bool hadMacroDefinition() const {
192 return HadMacro;
193 }
194
195 /// If this is a source-language token (e.g. 'for'), this API
196 /// can be used to cause the lexer to map identifiers to source-language
197 /// tokens.
198 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
199
200 /// True if revertTokenIDToIdentifier() was called.
201 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
202
203 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
204 /// compatibility.
205 ///
206 /// TokenID is normally read-only but there are 2 instances where we revert it
207 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
208 /// using this method so we can inform serialization about it.
209 void revertTokenIDToIdentifier() {
210 assert(TokenID != tok::identifier && "Already at tok::identifier");
211 TokenID = tok::identifier;
212 RevertedTokenID = true;
213 }
214 void revertIdentifierToTokenID(tok::TokenKind TK) {
215 assert(TokenID == tok::identifier && "Should be at tok::identifier");
216 TokenID = TK;
217 RevertedTokenID = false;
218 }
219
220 /// Return the preprocessor keyword ID for this identifier.
221 ///
222 /// For example, "define" will return tok::pp_define.
223 tok::PPKeywordKind getPPKeywordID() const;
224
225 /// Return the Objective-C keyword ID for the this identifier.
226 ///
227 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
228 tok::ObjCKeywordKind getObjCKeywordID() const {
229 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
230 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
231 else
232 return tok::objc_not_keyword;
233 }
234 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
235
236 /// Return a value indicating whether this is a builtin function.
237 ///
238 /// 0 is not-built-in. 1+ are specific builtin functions.
239 unsigned getBuiltinID() const {
240 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
241 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
242 else
243 return 0;
244 }
245 void setBuiltinID(unsigned ID) {
246 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
247 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
248 && "ID too large for field!");
249 }
250
251 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
252 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
253
254 /// get/setExtension - Initialize information about whether or not this
255 /// language token is an extension. This controls extension warnings, and is
256 /// only valid if a custom token ID is set.
257 bool isExtensionToken() const { return IsExtension; }
258 void setIsExtensionToken(bool Val) {
259 IsExtension = Val;
260 if (Val)
261 NeedsHandleIdentifier = true;
262 else
263 RecomputeNeedsHandleIdentifier();
264 }
265
266 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
267 /// this language token is a keyword in a newer or proposed Standard. This
268 /// controls compatibility warnings, and is only true when not parsing the
269 /// corresponding Standard. Once a compatibility problem has been diagnosed
270 /// with this keyword, the flag will be cleared.
271 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
272 void setIsFutureCompatKeyword(bool Val) {
273 IsFutureCompatKeyword = Val;
274 if (Val)
275 NeedsHandleIdentifier = true;
276 else
277 RecomputeNeedsHandleIdentifier();
278 }
279
280 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
281 /// Preprocessor will emit an error every time this token is used.
282 void setIsPoisoned(bool Value = true) {
283 IsPoisoned = Value;
284 if (Value)
285 NeedsHandleIdentifier = true;
286 else
287 RecomputeNeedsHandleIdentifier();
288 }
289
290 /// Return true if this token has been poisoned.
291 bool isPoisoned() const { return IsPoisoned; }
292
293 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
294 /// this identifier is a C++ alternate representation of an operator.
295 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
296 IsCPPOperatorKeyword = Val;
297 }
298 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
299
300 /// Return true if this token is a keyword in the specified language.
301 bool isKeyword(const LangOptions &LangOpts) const;
302
303 /// Return true if this token is a C++ keyword in the specified
304 /// language.
305 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
306
307 /// Get and set FETokenInfo. The language front-end is allowed to associate
308 /// arbitrary metadata with this token.
309 void *getFETokenInfo() const { return FETokenInfo; }
310 void setFETokenInfo(void *T) { FETokenInfo = T; }
311
312 /// Return true if the Preprocessor::HandleIdentifier must be called
313 /// on a token of this identifier.
314 ///
315 /// If this returns false, we know that HandleIdentifier will not affect
316 /// the token.
317 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
318
319 /// Return true if the identifier in its current state was loaded
320 /// from an AST file.
321 bool isFromAST() const { return IsFromAST; }
322
323 void setIsFromAST() { IsFromAST = true; }
324
325 /// Determine whether this identifier has changed since it was loaded
326 /// from an AST file.
327 bool hasChangedSinceDeserialization() const {
328 return ChangedAfterLoad;
329 }
330
331 /// Note that this identifier has changed since it was loaded from
332 /// an AST file.
333 void setChangedSinceDeserialization() {
334 ChangedAfterLoad = true;
335 }
336
337 /// Determine whether the frontend token information for this
338 /// identifier has changed since it was loaded from an AST file.
339 bool hasFETokenInfoChangedSinceDeserialization() const {
340 return FEChangedAfterLoad;
341 }
342
343 /// Note that the frontend token information for this identifier has
344 /// changed since it was loaded from an AST file.
345 void setFETokenInfoChangedSinceDeserialization() {
346 FEChangedAfterLoad = true;
347 }
348
349 /// Determine whether the information for this identifier is out of
350 /// date with respect to the external source.
351 bool isOutOfDate() const { return OutOfDate; }
352
353 /// Set whether the information for this identifier is out of
354 /// date with respect to the external source.
355 void setOutOfDate(bool OOD) {
356 OutOfDate = OOD;
357 if (OOD)
358 NeedsHandleIdentifier = true;
359 else
360 RecomputeNeedsHandleIdentifier();
361 }
362
363 /// Determine whether this is the contextual keyword \c import.
364 bool isModulesImport() const { return IsModulesImport; }
365
366 /// Set whether this identifier is the contextual keyword \c import.
367 void setModulesImport(bool I) {
368 IsModulesImport = I;
369 if (I)
370 NeedsHandleIdentifier = true;
371 else
372 RecomputeNeedsHandleIdentifier();
373 }
374
375 /// Determine whether this is the mangled name of an OpenMP variant.
376 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
377
378 /// Set whether this is the mangled name of an OpenMP variant.
379 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
380
381 /// Return true if this identifier is an editor placeholder.
382 ///
383 /// Editor placeholders are produced by the code-completion engine and are
384 /// represented as characters between '<#' and '#>' in the source code. An
385 /// example of auto-completed call with a placeholder parameter is shown
386 /// below:
387 /// \code
388 /// function(<#int x#>);
389 /// \endcode
390 bool isEditorPlaceholder() const {
391 return getName().startswith("<#") && getName().endswith("#>");
392 }
393
394 /// Determine whether \p this is a name reserved for the implementation (C99
395 /// 7.1.3, C++ [lib.global.names]).
396 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
397
398 /// Provide less than operator for lexicographical sorting.
399 bool operator<(const IdentifierInfo &RHS) const {
400 return getName() < RHS.getName();
401 }
402
403 private:
404 /// The Preprocessor::HandleIdentifier does several special (but rare)
405 /// things to identifiers of various sorts. For example, it changes the
406 /// \c for keyword token from tok::identifier to tok::for.
407 ///
408 /// This method is very tied to the definition of HandleIdentifier. Any
409 /// change to it should be reflected here.
410 void RecomputeNeedsHandleIdentifier() {
411 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
412 isExtensionToken() || isFutureCompatKeyword() ||
413 isOutOfDate() || isModulesImport();
414 }
415 };
416
417 /// An RAII object for [un]poisoning an identifier within a scope.
418 ///
419 /// \p II is allowed to be null, in which case objects of this type have
420 /// no effect.
421 class PoisonIdentifierRAIIObject {
422 IdentifierInfo *const II;
423 const bool OldValue;
424
425 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)426 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
427 : II(II), OldValue(II ? II->isPoisoned() : false) {
428 if(II)
429 II->setIsPoisoned(NewValue);
430 }
431
~PoisonIdentifierRAIIObject()432 ~PoisonIdentifierRAIIObject() {
433 if(II)
434 II->setIsPoisoned(OldValue);
435 }
436 };
437
438 /// An iterator that walks over all of the known identifiers
439 /// in the lookup table.
440 ///
441 /// Since this iterator uses an abstract interface via virtual
442 /// functions, it uses an object-oriented interface rather than the
443 /// more standard C++ STL iterator interface. In this OO-style
444 /// iteration, the single function \c Next() provides dereference,
445 /// advance, and end-of-sequence checking in a single
446 /// operation. Subclasses of this iterator type will provide the
447 /// actual functionality.
448 class IdentifierIterator {
449 protected:
450 IdentifierIterator() = default;
451
452 public:
453 IdentifierIterator(const IdentifierIterator &) = delete;
454 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
455
456 virtual ~IdentifierIterator();
457
458 /// Retrieve the next string in the identifier table and
459 /// advances the iterator for the following string.
460 ///
461 /// \returns The next string in the identifier table. If there is
462 /// no such string, returns an empty \c StringRef.
463 virtual StringRef Next() = 0;
464 };
465
466 /// Provides lookups to, and iteration over, IdentiferInfo objects.
467 class IdentifierInfoLookup {
468 public:
469 virtual ~IdentifierInfoLookup();
470
471 /// Return the IdentifierInfo for the specified named identifier.
472 ///
473 /// Unlike the version in IdentifierTable, this returns a pointer instead
474 /// of a reference. If the pointer is null then the IdentifierInfo cannot
475 /// be found.
476 virtual IdentifierInfo* get(StringRef Name) = 0;
477
478 /// Retrieve an iterator into the set of all identifiers
479 /// known to this identifier lookup source.
480 ///
481 /// This routine provides access to all of the identifiers known to
482 /// the identifier lookup, allowing access to the contents of the
483 /// identifiers without introducing the overhead of constructing
484 /// IdentifierInfo objects for each.
485 ///
486 /// \returns A new iterator into the set of known identifiers. The
487 /// caller is responsible for deleting this iterator.
488 virtual IdentifierIterator *getIdentifiers();
489 };
490
491 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
492 ///
493 /// This has no other purpose, but this is an extremely performance-critical
494 /// piece of the code, as each occurrence of every identifier goes through
495 /// here when lexed.
496 class IdentifierTable {
497 // Shark shows that using MallocAllocator is *much* slower than using this
498 // BumpPtrAllocator!
499 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
500 HashTableTy HashTable;
501
502 IdentifierInfoLookup* ExternalLookup;
503
504 public:
505 /// Create the identifier table.
506 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
507
508 /// Create the identifier table, populating it with info about the
509 /// language keywords for the language specified by \p LangOpts.
510 explicit IdentifierTable(const LangOptions &LangOpts,
511 IdentifierInfoLookup *ExternalLookup = nullptr);
512
513 /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)514 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
515 ExternalLookup = IILookup;
516 }
517
518 /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()519 IdentifierInfoLookup *getExternalIdentifierLookup() const {
520 return ExternalLookup;
521 }
522
getAllocator()523 llvm::BumpPtrAllocator& getAllocator() {
524 return HashTable.getAllocator();
525 }
526
527 /// Return the identifier token info for the specified named
528 /// identifier.
get(StringRef Name)529 IdentifierInfo &get(StringRef Name) {
530 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
531
532 IdentifierInfo *&II = Entry.second;
533 if (II) return *II;
534
535 // No entry; if we have an external lookup, look there first.
536 if (ExternalLookup) {
537 II = ExternalLookup->get(Name);
538 if (II)
539 return *II;
540 }
541
542 // Lookups failed, make a new IdentifierInfo.
543 void *Mem = getAllocator().Allocate<IdentifierInfo>();
544 II = new (Mem) IdentifierInfo();
545
546 // Make sure getName() knows how to find the IdentifierInfo
547 // contents.
548 II->Entry = &Entry;
549
550 return *II;
551 }
552
get(StringRef Name,tok::TokenKind TokenCode)553 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
554 IdentifierInfo &II = get(Name);
555 II.TokenID = TokenCode;
556 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
557 return II;
558 }
559
560 /// Gets an IdentifierInfo for the given name without consulting
561 /// external sources.
562 ///
563 /// This is a version of get() meant for external sources that want to
564 /// introduce or modify an identifier. If they called get(), they would
565 /// likely end up in a recursion.
getOwn(StringRef Name)566 IdentifierInfo &getOwn(StringRef Name) {
567 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
568
569 IdentifierInfo *&II = Entry.second;
570 if (II)
571 return *II;
572
573 // Lookups failed, make a new IdentifierInfo.
574 void *Mem = getAllocator().Allocate<IdentifierInfo>();
575 II = new (Mem) IdentifierInfo();
576
577 // Make sure getName() knows how to find the IdentifierInfo
578 // contents.
579 II->Entry = &Entry;
580
581 // If this is the 'import' contextual keyword, mark it as such.
582 if (Name.equals("import"))
583 II->setModulesImport(true);
584
585 return *II;
586 }
587
588 using iterator = HashTableTy::const_iterator;
589 using const_iterator = HashTableTy::const_iterator;
590
begin()591 iterator begin() const { return HashTable.begin(); }
end()592 iterator end() const { return HashTable.end(); }
size()593 unsigned size() const { return HashTable.size(); }
594
find(StringRef Name)595 iterator find(StringRef Name) const { return HashTable.find(Name); }
596
597 /// Print some statistics to stderr that indicate how well the
598 /// hashing is doing.
599 void PrintStats() const;
600
601 /// Populate the identifier table with info about the language keywords
602 /// for the language specified by \p LangOpts.
603 void AddKeywords(const LangOptions &LangOpts);
604 };
605
606 /// A family of Objective-C methods.
607 ///
608 /// These families have no inherent meaning in the language, but are
609 /// nonetheless central enough in the existing implementations to
610 /// merit direct AST support. While, in theory, arbitrary methods can
611 /// be considered to form families, we focus here on the methods
612 /// involving allocation and retain-count management, as these are the
613 /// most "core" and the most likely to be useful to diverse clients
614 /// without extra information.
615 ///
616 /// Both selectors and actual method declarations may be classified
617 /// into families. Method families may impose additional restrictions
618 /// beyond their selector name; for example, a method called '_init'
619 /// that returns void is not considered to be in the 'init' family
620 /// (but would be if it returned 'id'). It is also possible to
621 /// explicitly change or remove a method's family. Therefore the
622 /// method's family should be considered the single source of truth.
623 enum ObjCMethodFamily {
624 /// No particular method family.
625 OMF_None,
626
627 // Selectors in these families may have arbitrary arity, may be
628 // written with arbitrary leading underscores, and may have
629 // additional CamelCase "words" in their first selector chunk
630 // following the family name.
631 OMF_alloc,
632 OMF_copy,
633 OMF_init,
634 OMF_mutableCopy,
635 OMF_new,
636
637 // These families are singletons consisting only of the nullary
638 // selector with the given name.
639 OMF_autorelease,
640 OMF_dealloc,
641 OMF_finalize,
642 OMF_release,
643 OMF_retain,
644 OMF_retainCount,
645 OMF_self,
646 OMF_initialize,
647
648 // performSelector families
649 OMF_performSelector
650 };
651
652 /// Enough bits to store any enumerator in ObjCMethodFamily or
653 /// InvalidObjCMethodFamily.
654 enum { ObjCMethodFamilyBitWidth = 4 };
655
656 /// An invalid value of ObjCMethodFamily.
657 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
658
659 /// A family of Objective-C methods.
660 ///
661 /// These are family of methods whose result type is initially 'id', but
662 /// but are candidate for the result type to be changed to 'instancetype'.
663 enum ObjCInstanceTypeFamily {
664 OIT_None,
665 OIT_Array,
666 OIT_Dictionary,
667 OIT_Singleton,
668 OIT_Init,
669 OIT_ReturnsSelf
670 };
671
672 enum ObjCStringFormatFamily {
673 SFF_None,
674 SFF_NSString,
675 SFF_CFString
676 };
677
678 /// Smart pointer class that efficiently represents Objective-C method
679 /// names.
680 ///
681 /// This class will either point to an IdentifierInfo or a
682 /// MultiKeywordSelector (which is private). This enables us to optimize
683 /// selectors that take no arguments and selectors that take 1 argument, which
684 /// accounts for 78% of all selectors in Cocoa.h.
685 class Selector {
686 friend class Diagnostic;
687 friend class SelectorTable; // only the SelectorTable can create these
688 friend class DeclarationName; // and the AST's DeclarationName.
689
690 enum IdentifierInfoFlag {
691 // Empty selector = 0. Note that these enumeration values must
692 // correspond to the enumeration values of DeclarationName::StoredNameKind
693 ZeroArg = 0x01,
694 OneArg = 0x02,
695 MultiArg = 0x07,
696 ArgFlags = 0x07
697 };
698
699 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
700 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
701 /// case IdentifierInfo and MultiKeywordSelector are already aligned to
702 /// 8 bytes even on 32 bits archs because of DeclarationName.
703 uintptr_t InfoPtr = 0;
704
Selector(IdentifierInfo * II,unsigned nArgs)705 Selector(IdentifierInfo *II, unsigned nArgs) {
706 InfoPtr = reinterpret_cast<uintptr_t>(II);
707 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
708 assert(nArgs < 2 && "nArgs not equal to 0/1");
709 InfoPtr |= nArgs+1;
710 }
711
Selector(MultiKeywordSelector * SI)712 Selector(MultiKeywordSelector *SI) {
713 InfoPtr = reinterpret_cast<uintptr_t>(SI);
714 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
715 InfoPtr |= MultiArg;
716 }
717
getAsIdentifierInfo()718 IdentifierInfo *getAsIdentifierInfo() const {
719 if (getIdentifierInfoFlag() < MultiArg)
720 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
721 return nullptr;
722 }
723
getMultiKeywordSelector()724 MultiKeywordSelector *getMultiKeywordSelector() const {
725 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
726 }
727
getIdentifierInfoFlag()728 unsigned getIdentifierInfoFlag() const {
729 return InfoPtr & ArgFlags;
730 }
731
732 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
733
734 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
735
736 public:
737 /// The default ctor should only be used when creating data structures that
738 /// will contain selectors.
739 Selector() = default;
Selector(uintptr_t V)740 explicit Selector(uintptr_t V) : InfoPtr(V) {}
741
742 /// operator==/!= - Indicate whether the specified selectors are identical.
743 bool operator==(Selector RHS) const {
744 return InfoPtr == RHS.InfoPtr;
745 }
746 bool operator!=(Selector RHS) const {
747 return InfoPtr != RHS.InfoPtr;
748 }
749
getAsOpaquePtr()750 void *getAsOpaquePtr() const {
751 return reinterpret_cast<void*>(InfoPtr);
752 }
753
754 /// Determine whether this is the empty selector.
isNull()755 bool isNull() const { return InfoPtr == 0; }
756
757 // Predicates to identify the selector type.
isKeywordSelector()758 bool isKeywordSelector() const {
759 return getIdentifierInfoFlag() != ZeroArg;
760 }
761
isUnarySelector()762 bool isUnarySelector() const {
763 return getIdentifierInfoFlag() == ZeroArg;
764 }
765
766 /// If this selector is the specific keyword selector described by Names.
767 bool isKeywordSelector(ArrayRef<StringRef> Names) const;
768
769 /// If this selector is the specific unary selector described by Name.
770 bool isUnarySelector(StringRef Name) const;
771
772 unsigned getNumArgs() const;
773
774 /// Retrieve the identifier at a given position in the selector.
775 ///
776 /// Note that the identifier pointer returned may be NULL. Clients that only
777 /// care about the text of the identifier string, and not the specific,
778 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
779 /// an empty string when the identifier pointer would be NULL.
780 ///
781 /// \param argIndex The index for which we want to retrieve the identifier.
782 /// This index shall be less than \c getNumArgs() unless this is a keyword
783 /// selector, in which case 0 is the only permissible value.
784 ///
785 /// \returns the uniqued identifier for this slot, or NULL if this slot has
786 /// no corresponding identifier.
787 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
788
789 /// Retrieve the name at a given position in the selector.
790 ///
791 /// \param argIndex The index for which we want to retrieve the name.
792 /// This index shall be less than \c getNumArgs() unless this is a keyword
793 /// selector, in which case 0 is the only permissible value.
794 ///
795 /// \returns the name for this slot, which may be the empty string if no
796 /// name was supplied.
797 StringRef getNameForSlot(unsigned argIndex) const;
798
799 /// Derive the full selector name (e.g. "foo:bar:") and return
800 /// it as an std::string.
801 std::string getAsString() const;
802
803 /// Prints the full selector name (e.g. "foo:bar:").
804 void print(llvm::raw_ostream &OS) const;
805
806 void dump() const;
807
808 /// Derive the conventional family of this method.
getMethodFamily()809 ObjCMethodFamily getMethodFamily() const {
810 return getMethodFamilyImpl(*this);
811 }
812
getStringFormatFamily()813 ObjCStringFormatFamily getStringFormatFamily() const {
814 return getStringFormatFamilyImpl(*this);
815 }
816
getEmptyMarker()817 static Selector getEmptyMarker() {
818 return Selector(uintptr_t(-1));
819 }
820
getTombstoneMarker()821 static Selector getTombstoneMarker() {
822 return Selector(uintptr_t(-2));
823 }
824
825 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
826 };
827
828 /// This table allows us to fully hide how we implement
829 /// multi-keyword caching.
830 class SelectorTable {
831 // Actually a SelectorTableImpl
832 void *Impl;
833
834 public:
835 SelectorTable();
836 SelectorTable(const SelectorTable &) = delete;
837 SelectorTable &operator=(const SelectorTable &) = delete;
838 ~SelectorTable();
839
840 /// Can create any sort of selector.
841 ///
842 /// \p NumArgs indicates whether this is a no argument selector "foo", a
843 /// single argument selector "foo:" or multi-argument "foo:bar:".
844 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
845
getUnarySelector(IdentifierInfo * ID)846 Selector getUnarySelector(IdentifierInfo *ID) {
847 return Selector(ID, 1);
848 }
849
getNullarySelector(IdentifierInfo * ID)850 Selector getNullarySelector(IdentifierInfo *ID) {
851 return Selector(ID, 0);
852 }
853
854 /// Return the total amount of memory allocated for managing selectors.
855 size_t getTotalMemory() const;
856
857 /// Return the default setter name for the given identifier.
858 ///
859 /// This is "set" + \p Name where the initial character of \p Name
860 /// has been capitalized.
861 static SmallString<64> constructSetterName(StringRef Name);
862
863 /// Return the default setter selector for the given identifier.
864 ///
865 /// This is "set" + \p Name where the initial character of \p Name
866 /// has been capitalized.
867 static Selector constructSetterSelector(IdentifierTable &Idents,
868 SelectorTable &SelTable,
869 const IdentifierInfo *Name);
870
871 /// Return the property name for the given setter selector.
872 static std::string getPropertyNameFromSetterSelector(Selector Sel);
873 };
874
875 namespace detail {
876
877 /// DeclarationNameExtra is used as a base of various uncommon special names.
878 /// This class is needed since DeclarationName has not enough space to store
879 /// the kind of every possible names. Therefore the kind of common names is
880 /// stored directly in DeclarationName, and the kind of uncommon names is
881 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
882 /// DeclarationName needs the lower 3 bits to store the kind of common names.
883 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
884 /// here is very likely to require changes in DeclarationName(Table).
alignas(IdentifierInfoAlignment)885 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
886 friend class clang::DeclarationName;
887 friend class clang::DeclarationNameTable;
888
889 protected:
890 /// The kind of "extra" information stored in the DeclarationName. See
891 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
892 /// are used. Note that DeclarationName depends on the numerical values
893 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
894 /// for more info.
895 enum ExtraKind {
896 CXXDeductionGuideName,
897 CXXLiteralOperatorName,
898 CXXUsingDirective,
899 ObjCMultiArgSelector
900 };
901
902 /// ExtraKindOrNumArgs has one of the following meaning:
903 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
904 /// is in this case in fact either a CXXDeductionGuideNameExtra or
905 /// a CXXLiteralOperatorIdName.
906 ///
907 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
908 ///
909 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
910 /// the number of arguments in the Objective-C selector, in which
911 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
912 unsigned ExtraKindOrNumArgs;
913
914 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
915 DeclarationNameExtra(unsigned NumArgs)
916 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
917
918 /// Return the corresponding ExtraKind.
919 ExtraKind getKind() const {
920 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
921 (unsigned)ObjCMultiArgSelector
922 ? (unsigned)ObjCMultiArgSelector
923 : ExtraKindOrNumArgs);
924 }
925
926 /// Return the number of arguments in an ObjC selector. Only valid when this
927 /// is indeed an ObjCMultiArgSelector.
928 unsigned getNumArgs() const {
929 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
930 "getNumArgs called but this is not an ObjC selector!");
931 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
932 }
933 };
934
935 } // namespace detail
936
937 } // namespace clang
938
939 namespace llvm {
940
941 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
942 /// DenseSets.
943 template <>
944 struct DenseMapInfo<clang::Selector> {
945 static clang::Selector getEmptyKey() {
946 return clang::Selector::getEmptyMarker();
947 }
948
949 static clang::Selector getTombstoneKey() {
950 return clang::Selector::getTombstoneMarker();
951 }
952
953 static unsigned getHashValue(clang::Selector S);
954
955 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
956 return LHS == RHS;
957 }
958 };
959
960 template<>
961 struct PointerLikeTypeTraits<clang::Selector> {
962 static const void *getAsVoidPointer(clang::Selector P) {
963 return P.getAsOpaquePtr();
964 }
965
966 static clang::Selector getFromVoidPointer(const void *P) {
967 return clang::Selector(reinterpret_cast<uintptr_t>(P));
968 }
969
970 static constexpr int NumLowBitsAvailable = 0;
971 };
972
973 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
974 // are not guaranteed to be 8-byte aligned.
975 template<>
976 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
977 static void *getAsVoidPointer(clang::IdentifierInfo* P) {
978 return P;
979 }
980
981 static clang::IdentifierInfo *getFromVoidPointer(void *P) {
982 return static_cast<clang::IdentifierInfo*>(P);
983 }
984
985 static constexpr int NumLowBitsAvailable = 1;
986 };
987
988 template<>
989 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
990 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
991 return P;
992 }
993
994 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
995 return static_cast<const clang::IdentifierInfo*>(P);
996 }
997
998 static constexpr int NumLowBitsAvailable = 1;
999 };
1000
1001 } // namespace llvm
1002
1003 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1004