1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdentifierInfo, IdentifierVisitor, and
10 // IdentifierTable interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/IdentifierTable.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/DiagnosticLex.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 #include <cstdio>
32 #include <cstring>
33 #include <string>
34 
35 using namespace clang;
36 
37 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the
38 // largest possible target/aux-target combination. If we exceed this, we likely
39 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
40 static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)),
41               "Insufficient ObjCOrBuiltinID Bits");
42 
43 //===----------------------------------------------------------------------===//
44 // IdentifierTable Implementation
45 //===----------------------------------------------------------------------===//
46 
47 IdentifierIterator::~IdentifierIterator() = default;
48 
49 IdentifierInfoLookup::~IdentifierInfoLookup() = default;
50 
51 namespace {
52 
53 /// A simple identifier lookup iterator that represents an
54 /// empty sequence of identifiers.
55 class EmptyLookupIterator : public IdentifierIterator
56 {
57 public:
58   StringRef Next() override { return StringRef(); }
59 };
60 
61 } // namespace
62 
63 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
64   return new EmptyLookupIterator();
65 }
66 
67 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
68     : HashTable(8192), // Start with space for 8K identifiers.
69       ExternalLookup(ExternalLookup) {}
70 
71 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
72                                  IdentifierInfoLookup *ExternalLookup)
73     : IdentifierTable(ExternalLookup) {
74   // Populate the identifier table with info about keywords for the current
75   // language.
76   AddKeywords(LangOpts);
77 }
78 
79 //===----------------------------------------------------------------------===//
80 // Language Keyword Implementation
81 //===----------------------------------------------------------------------===//
82 
83 // Constants for TokenKinds.def
84 namespace {
85 
86   enum TokenKey : unsigned {
87     KEYC99        = 0x1,
88     KEYCXX        = 0x2,
89     KEYCXX11      = 0x4,
90     KEYGNU        = 0x8,
91     KEYMS         = 0x10,
92     BOOLSUPPORT   = 0x20,
93     KEYALTIVEC    = 0x40,
94     KEYNOCXX      = 0x80,
95     KEYBORLAND    = 0x100,
96     KEYOPENCLC    = 0x200,
97     KEYC2X        = 0x400,
98     KEYNOMS18     = 0x800,
99     KEYNOOPENCL   = 0x1000,
100     WCHARSUPPORT  = 0x2000,
101     HALFSUPPORT   = 0x4000,
102     CHAR8SUPPORT  = 0x8000,
103     KEYOBJC       = 0x10000,
104     KEYZVECTOR    = 0x20000,
105     KEYCOROUTINES = 0x40000,
106     KEYMODULES    = 0x80000,
107     KEYCXX20      = 0x100000,
108     KEYOPENCLCXX  = 0x200000,
109     KEYMSCOMPAT   = 0x400000,
110     KEYSYCL       = 0x800000,
111     KEYCUDA       = 0x1000000,
112     KEYHLSL       = 0x2000000,
113     KEYMAX        = KEYHLSL, // The maximum key
114     KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
115     KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 &
116              ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude.
117   };
118 
119   /// How a keyword is treated in the selected standard. This enum is ordered
120   /// intentionally so that the value that 'wins' is the most 'permissive'.
121   enum KeywordStatus {
122     KS_Unknown,     // Not yet calculated. Used when figuring out the status.
123     KS_Disabled,    // Disabled
124     KS_Future,      // Is a keyword in future standard
125     KS_Extension,   // Is an extension
126     KS_Enabled,     // Enabled
127   };
128 
129 } // namespace
130 
131 // This works on a single TokenKey flag and checks the LangOpts to get the
132 // KeywordStatus based exclusively on this flag, so that it can be merged in
133 // getKeywordStatus. Most should be enabled/disabled, but some might imply
134 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
135 // be disabled, and the calling function makes it 'disabled' if no other flag
136 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
137 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
138                                             TokenKey Flag) {
139   // Flag is a single bit version of TokenKey (that is, not
140   // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
141   assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
142 
143   switch (Flag) {
144   case KEYC99:
145     if (LangOpts.C99)
146       return KS_Enabled;
147     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
148   case KEYC2X:
149     if (LangOpts.C2x)
150       return KS_Enabled;
151     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
152   case KEYCXX:
153     return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
154   case KEYCXX11:
155     if (LangOpts.CPlusPlus11)
156       return KS_Enabled;
157     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
158   case KEYCXX20:
159     if (LangOpts.CPlusPlus20)
160       return KS_Enabled;
161     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
162   case KEYGNU:
163     return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
164   case KEYMS:
165     return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
166   case BOOLSUPPORT:
167     if (LangOpts.Bool)      return KS_Enabled;
168     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
169   case KEYALTIVEC:
170     return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
171   case KEYBORLAND:
172     return LangOpts.Borland ? KS_Extension : KS_Unknown;
173   case KEYOPENCLC:
174     return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
175                                                         : KS_Unknown;
176   case WCHARSUPPORT:
177     return LangOpts.WChar ? KS_Enabled : KS_Unknown;
178   case HALFSUPPORT:
179     return LangOpts.Half ? KS_Enabled : KS_Unknown;
180   case CHAR8SUPPORT:
181     if (LangOpts.Char8) return KS_Enabled;
182     if (LangOpts.CPlusPlus20) return KS_Unknown;
183     if (LangOpts.CPlusPlus) return KS_Future;
184     return KS_Unknown;
185   case KEYOBJC:
186     // We treat bridge casts as objective-C keywords so we can warn on them
187     // in non-arc mode.
188     return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
189   case KEYZVECTOR:
190     return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
191   case KEYCOROUTINES:
192     return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
193   case KEYMODULES:
194     return KS_Unknown;
195   case KEYOPENCLCXX:
196     return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
197   case KEYMSCOMPAT:
198     return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
199   case KEYSYCL:
200     return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
201   case KEYCUDA:
202     return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
203   case KEYHLSL:
204     return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
205   case KEYNOCXX:
206     // This is enabled in all non-C++ modes, but might be enabled for other
207     // reasons as well.
208     return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
209   case KEYNOOPENCL:
210     // The disable behavior for this is handled in getKeywordStatus.
211     return KS_Unknown;
212   case KEYNOMS18:
213     // The disable behavior for this is handled in getKeywordStatus.
214     return KS_Unknown;
215   default:
216     llvm_unreachable("Unknown KeywordStatus flag");
217   }
218 }
219 
220 /// Translates flags as specified in TokenKinds.def into keyword status
221 /// in the given language standard.
222 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
223                                       unsigned Flags) {
224   // KEYALL means always enabled, so special case this one.
225   if (Flags == KEYALL) return KS_Enabled;
226   // These are tests that need to 'always win', as they are special in that they
227   // disable based on certain conditions.
228   if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
229   if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
230       !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
231     return KS_Disabled;
232 
233   KeywordStatus CurStatus = KS_Unknown;
234 
235   while (Flags != 0) {
236     unsigned CurFlag = Flags & ~(Flags - 1);
237     Flags = Flags & ~CurFlag;
238     CurStatus = std::max(
239         CurStatus,
240         getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
241   }
242 
243   if (CurStatus == KS_Unknown)
244     return KS_Disabled;
245   return CurStatus;
246 }
247 
248 /// AddKeyword - This method is used to associate a token ID with specific
249 /// identifiers because they are language keywords.  This causes the lexer to
250 /// automatically map matching identifiers to specialized token codes.
251 static void AddKeyword(StringRef Keyword,
252                        tok::TokenKind TokenCode, unsigned Flags,
253                        const LangOptions &LangOpts, IdentifierTable &Table) {
254   KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
255 
256   // Don't add this keyword if disabled in this language.
257   if (AddResult == KS_Disabled) return;
258 
259   IdentifierInfo &Info =
260       Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
261   Info.setIsExtensionToken(AddResult == KS_Extension);
262   Info.setIsFutureCompatKeyword(AddResult == KS_Future);
263 }
264 
265 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
266 /// representations.
267 static void AddCXXOperatorKeyword(StringRef Keyword,
268                                   tok::TokenKind TokenCode,
269                                   IdentifierTable &Table) {
270   IdentifierInfo &Info = Table.get(Keyword, TokenCode);
271   Info.setIsCPlusPlusOperatorKeyword();
272 }
273 
274 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
275 /// or "property".
276 static void AddObjCKeyword(StringRef Name,
277                            tok::ObjCKeywordKind ObjCID,
278                            IdentifierTable &Table) {
279   Table.get(Name).setObjCKeywordID(ObjCID);
280 }
281 
282 static void AddInterestingIdentifier(StringRef Name,
283                                      tok::InterestingIdentifierKind BTID,
284                                      IdentifierTable &Table) {
285   // Don't add 'not_interesting' identifier.
286   if (BTID != tok::not_interesting) {
287     IdentifierInfo &Info = Table.get(Name, tok::identifier);
288     Info.setInterestingIdentifierID(BTID);
289   }
290 }
291 
292 /// AddKeywords - Add all keywords to the symbol table.
293 ///
294 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
295   // Add keywords and tokens for the current language.
296 #define KEYWORD(NAME, FLAGS) \
297   AddKeyword(StringRef(#NAME), tok::kw_ ## NAME,  \
298              FLAGS, LangOpts, *this);
299 #define ALIAS(NAME, TOK, FLAGS) \
300   AddKeyword(StringRef(NAME), tok::kw_ ## TOK,  \
301              FLAGS, LangOpts, *this);
302 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
303   if (LangOpts.CXXOperatorNames)          \
304     AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
305 #define OBJC_AT_KEYWORD(NAME)  \
306   if (LangOpts.ObjC)           \
307     AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
308 #define INTERESTING_IDENTIFIER(NAME)                                           \
309   AddInterestingIdentifier(StringRef(#NAME), tok::NAME, *this);
310 
311 #define TESTING_KEYWORD(NAME, FLAGS)
312 #include "clang/Basic/TokenKinds.def"
313 
314   if (LangOpts.ParseUnknownAnytype)
315     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
316                LangOpts, *this);
317 
318   if (LangOpts.DeclSpecKeyword)
319     AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
320 
321   if (LangOpts.IEEE128)
322     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
323 
324   // Add the 'import' contextual keyword.
325   get("import").setModulesImport(true);
326 }
327 
328 /// Checks if the specified token kind represents a keyword in the
329 /// specified language.
330 /// \returns Status of the keyword in the language.
331 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
332                                       tok::TokenKind K) {
333   switch (K) {
334 #define KEYWORD(NAME, FLAGS) \
335   case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
336 #include "clang/Basic/TokenKinds.def"
337   default: return KS_Disabled;
338   }
339 }
340 
341 /// Returns true if the identifier represents a keyword in the
342 /// specified language.
343 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
344   switch (getTokenKwStatus(LangOpts, getTokenID())) {
345   case KS_Enabled:
346   case KS_Extension:
347     return true;
348   default:
349     return false;
350   }
351 }
352 
353 /// Returns true if the identifier represents a C++ keyword in the
354 /// specified language.
355 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
356   if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
357     return false;
358   // This is a C++ keyword if this identifier is not a keyword when checked
359   // using LangOptions without C++ support.
360   LangOptions LangOptsNoCPP = LangOpts;
361   LangOptsNoCPP.CPlusPlus = false;
362   LangOptsNoCPP.CPlusPlus11 = false;
363   LangOptsNoCPP.CPlusPlus20 = false;
364   return !isKeyword(LangOptsNoCPP);
365 }
366 
367 ReservedIdentifierStatus
368 IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
369   StringRef Name = getName();
370 
371   // '_' is a reserved identifier, but its use is so common (e.g. to store
372   // ignored values) that we don't warn on it.
373   if (Name.size() <= 1)
374     return ReservedIdentifierStatus::NotReserved;
375 
376   // [lex.name] p3
377   if (Name[0] == '_') {
378 
379     // Each name that begins with an underscore followed by an uppercase letter
380     // or another underscore is reserved.
381     if (Name[1] == '_')
382       return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
383 
384     if ('A' <= Name[1] && Name[1] <= 'Z')
385       return ReservedIdentifierStatus::
386           StartsWithUnderscoreFollowedByCapitalLetter;
387 
388     // This is a bit misleading: it actually means it's only reserved if we're
389     // at global scope because it starts with an underscore.
390     return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
391   }
392 
393   // Each name that contains a double underscore (__) is reserved.
394   if (LangOpts.CPlusPlus && Name.contains("__"))
395     return ReservedIdentifierStatus::ContainsDoubleUnderscore;
396 
397   return ReservedIdentifierStatus::NotReserved;
398 }
399 
400 ReservedLiteralSuffixIdStatus
401 IdentifierInfo::isReservedLiteralSuffixId() const {
402   StringRef Name = getName();
403 
404   if (Name[0] != '_')
405     return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
406 
407   if (Name.contains("__"))
408     return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
409 
410   return ReservedLiteralSuffixIdStatus::NotReserved;
411 }
412 
413 StringRef IdentifierInfo::deuglifiedName() const {
414   StringRef Name = getName();
415   if (Name.size() >= 2 && Name.front() == '_' &&
416       (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
417     return Name.ltrim('_');
418   return Name;
419 }
420 
421 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
422   // We use a perfect hash function here involving the length of the keyword,
423   // the first and third character.  For preprocessor ID's there are no
424   // collisions (if there were, the switch below would complain about duplicate
425   // case values).  Note that this depends on 'if' being null terminated.
426 
427 #define HASH(LEN, FIRST, THIRD) \
428   (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
429 #define CASE(LEN, FIRST, THIRD, NAME) \
430   case HASH(LEN, FIRST, THIRD): \
431     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
432 
433   unsigned Len = getLength();
434   if (Len < 2) return tok::pp_not_keyword;
435   const char *Name = getNameStart();
436   switch (HASH(Len, Name[0], Name[2])) {
437   default: return tok::pp_not_keyword;
438   CASE( 2, 'i', '\0', if);
439   CASE( 4, 'e', 'i', elif);
440   CASE( 4, 'e', 's', else);
441   CASE( 4, 'l', 'n', line);
442   CASE( 4, 's', 'c', sccs);
443   CASE( 5, 'e', 'd', endif);
444   CASE( 5, 'e', 'r', error);
445   CASE( 5, 'i', 'e', ident);
446   CASE( 5, 'i', 'd', ifdef);
447   CASE( 5, 'u', 'd', undef);
448 
449   CASE( 6, 'a', 's', assert);
450   CASE( 6, 'd', 'f', define);
451   CASE( 6, 'i', 'n', ifndef);
452   CASE( 6, 'i', 'p', import);
453   CASE( 6, 'p', 'a', pragma);
454 
455   CASE( 7, 'd', 'f', defined);
456   CASE( 7, 'e', 'i', elifdef);
457   CASE( 7, 'i', 'c', include);
458   CASE( 7, 'w', 'r', warning);
459 
460   CASE( 8, 'e', 'i', elifndef);
461   CASE( 8, 'u', 'a', unassert);
462   CASE(12, 'i', 'c', include_next);
463 
464   CASE(14, '_', 'p', __public_macro);
465 
466   CASE(15, '_', 'p', __private_macro);
467 
468   CASE(16, '_', 'i', __include_macros);
469 #undef CASE
470 #undef HASH
471   }
472 }
473 
474 //===----------------------------------------------------------------------===//
475 // Stats Implementation
476 //===----------------------------------------------------------------------===//
477 
478 /// PrintStats - Print statistics about how well the identifier table is doing
479 /// at hashing identifiers.
480 void IdentifierTable::PrintStats() const {
481   unsigned NumBuckets = HashTable.getNumBuckets();
482   unsigned NumIdentifiers = HashTable.getNumItems();
483   unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
484   unsigned AverageIdentifierSize = 0;
485   unsigned MaxIdentifierLength = 0;
486 
487   // TODO: Figure out maximum times an identifier had to probe for -stats.
488   for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
489        I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
490     unsigned IdLen = I->getKeyLength();
491     AverageIdentifierSize += IdLen;
492     if (MaxIdentifierLength < IdLen)
493       MaxIdentifierLength = IdLen;
494   }
495 
496   fprintf(stderr, "\n*** Identifier Table Stats:\n");
497   fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
498   fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
499   fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
500           NumIdentifiers/(double)NumBuckets);
501   fprintf(stderr, "Ave identifier length: %f\n",
502           (AverageIdentifierSize/(double)NumIdentifiers));
503   fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
504 
505   // Compute statistics about the memory allocated for identifiers.
506   HashTable.getAllocator().PrintStats();
507 }
508 
509 //===----------------------------------------------------------------------===//
510 // SelectorTable Implementation
511 //===----------------------------------------------------------------------===//
512 
513 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
514   return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
515 }
516 
517 namespace clang {
518 
519 /// One of these variable length records is kept for each
520 /// selector containing more than one keyword. We use a folding set
521 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to
522 /// this class is provided strictly through Selector.
523 class alignas(IdentifierInfoAlignment) MultiKeywordSelector
524     : public detail::DeclarationNameExtra,
525       public llvm::FoldingSetNode {
526   MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
527 
528 public:
529   // Constructor for keyword selectors.
530   MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
531       : DeclarationNameExtra(nKeys) {
532     assert((nKeys > 1) && "not a multi-keyword selector");
533 
534     // Fill in the trailing keyword array.
535     IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
536     for (unsigned i = 0; i != nKeys; ++i)
537       KeyInfo[i] = IIV[i];
538   }
539 
540   // getName - Derive the full selector name and return it.
541   std::string getName() const;
542 
543   using DeclarationNameExtra::getNumArgs;
544 
545   using keyword_iterator = IdentifierInfo *const *;
546 
547   keyword_iterator keyword_begin() const {
548     return reinterpret_cast<keyword_iterator>(this + 1);
549   }
550 
551   keyword_iterator keyword_end() const {
552     return keyword_begin() + getNumArgs();
553   }
554 
555   IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
556     assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
557     return keyword_begin()[i];
558   }
559 
560   static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
561                       unsigned NumArgs) {
562     ID.AddInteger(NumArgs);
563     for (unsigned i = 0; i != NumArgs; ++i)
564       ID.AddPointer(ArgTys[i]);
565   }
566 
567   void Profile(llvm::FoldingSetNodeID &ID) {
568     Profile(ID, keyword_begin(), getNumArgs());
569   }
570 };
571 
572 } // namespace clang.
573 
574 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
575   assert(!Names.empty() && "must have >= 1 selector slots");
576   if (getNumArgs() != Names.size())
577     return false;
578   for (unsigned I = 0, E = Names.size(); I != E; ++I) {
579     if (getNameForSlot(I) != Names[I])
580       return false;
581   }
582   return true;
583 }
584 
585 bool Selector::isUnarySelector(StringRef Name) const {
586   return isUnarySelector() && getNameForSlot(0) == Name;
587 }
588 
589 unsigned Selector::getNumArgs() const {
590   unsigned IIF = getIdentifierInfoFlag();
591   if (IIF <= ZeroArg)
592     return 0;
593   if (IIF == OneArg)
594     return 1;
595   // We point to a MultiKeywordSelector.
596   MultiKeywordSelector *SI = getMultiKeywordSelector();
597   return SI->getNumArgs();
598 }
599 
600 IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
601   if (getIdentifierInfoFlag() < MultiArg) {
602     assert(argIndex == 0 && "illegal keyword index");
603     return getAsIdentifierInfo();
604   }
605 
606   // We point to a MultiKeywordSelector.
607   MultiKeywordSelector *SI = getMultiKeywordSelector();
608   return SI->getIdentifierInfoForSlot(argIndex);
609 }
610 
611 StringRef Selector::getNameForSlot(unsigned int argIndex) const {
612   IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
613   return II ? II->getName() : StringRef();
614 }
615 
616 std::string MultiKeywordSelector::getName() const {
617   SmallString<256> Str;
618   llvm::raw_svector_ostream OS(Str);
619   for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
620     if (*I)
621       OS << (*I)->getName();
622     OS << ':';
623   }
624 
625   return std::string(OS.str());
626 }
627 
628 std::string Selector::getAsString() const {
629   if (InfoPtr == 0)
630     return "<null selector>";
631 
632   if (getIdentifierInfoFlag() < MultiArg) {
633     IdentifierInfo *II = getAsIdentifierInfo();
634 
635     if (getNumArgs() == 0) {
636       assert(II && "If the number of arguments is 0 then II is guaranteed to "
637                    "not be null.");
638       return std::string(II->getName());
639     }
640 
641     if (!II)
642       return ":";
643 
644     return II->getName().str() + ":";
645   }
646 
647   // We have a multiple keyword selector.
648   return getMultiKeywordSelector()->getName();
649 }
650 
651 void Selector::print(llvm::raw_ostream &OS) const {
652   OS << getAsString();
653 }
654 
655 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
656 
657 /// Interpreting the given string using the normal CamelCase
658 /// conventions, determine whether the given string starts with the
659 /// given "word", which is assumed to end in a lowercase letter.
660 static bool startsWithWord(StringRef name, StringRef word) {
661   if (name.size() < word.size()) return false;
662   return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
663           name.startswith(word));
664 }
665 
666 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
667   IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
668   if (!first) return OMF_None;
669 
670   StringRef name = first->getName();
671   if (sel.isUnarySelector()) {
672     if (name == "autorelease") return OMF_autorelease;
673     if (name == "dealloc") return OMF_dealloc;
674     if (name == "finalize") return OMF_finalize;
675     if (name == "release") return OMF_release;
676     if (name == "retain") return OMF_retain;
677     if (name == "retainCount") return OMF_retainCount;
678     if (name == "self") return OMF_self;
679     if (name == "initialize") return OMF_initialize;
680   }
681 
682   if (name == "performSelector" || name == "performSelectorInBackground" ||
683       name == "performSelectorOnMainThread")
684     return OMF_performSelector;
685 
686   // The other method families may begin with a prefix of underscores.
687   while (!name.empty() && name.front() == '_')
688     name = name.substr(1);
689 
690   if (name.empty()) return OMF_None;
691   switch (name.front()) {
692   case 'a':
693     if (startsWithWord(name, "alloc")) return OMF_alloc;
694     break;
695   case 'c':
696     if (startsWithWord(name, "copy")) return OMF_copy;
697     break;
698   case 'i':
699     if (startsWithWord(name, "init")) return OMF_init;
700     break;
701   case 'm':
702     if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
703     break;
704   case 'n':
705     if (startsWithWord(name, "new")) return OMF_new;
706     break;
707   default:
708     break;
709   }
710 
711   return OMF_None;
712 }
713 
714 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
715   IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
716   if (!first) return OIT_None;
717 
718   StringRef name = first->getName();
719 
720   if (name.empty()) return OIT_None;
721   switch (name.front()) {
722     case 'a':
723       if (startsWithWord(name, "array")) return OIT_Array;
724       break;
725     case 'd':
726       if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
727       if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
728       break;
729     case 's':
730       if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
731       if (startsWithWord(name, "standard")) return OIT_Singleton;
732       break;
733     case 'i':
734       if (startsWithWord(name, "init")) return OIT_Init;
735       break;
736     default:
737       break;
738   }
739   return OIT_None;
740 }
741 
742 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
743   IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
744   if (!first) return SFF_None;
745 
746   StringRef name = first->getName();
747 
748   switch (name.front()) {
749     case 'a':
750       if (name == "appendFormat") return SFF_NSString;
751       break;
752 
753     case 'i':
754       if (name == "initWithFormat") return SFF_NSString;
755       break;
756 
757     case 'l':
758       if (name == "localizedStringWithFormat") return SFF_NSString;
759       break;
760 
761     case 's':
762       if (name == "stringByAppendingFormat" ||
763           name == "stringWithFormat") return SFF_NSString;
764       break;
765   }
766   return SFF_None;
767 }
768 
769 namespace {
770 
771 struct SelectorTableImpl {
772   llvm::FoldingSet<MultiKeywordSelector> Table;
773   llvm::BumpPtrAllocator Allocator;
774 };
775 
776 } // namespace
777 
778 static SelectorTableImpl &getSelectorTableImpl(void *P) {
779   return *static_cast<SelectorTableImpl*>(P);
780 }
781 
782 SmallString<64>
783 SelectorTable::constructSetterName(StringRef Name) {
784   SmallString<64> SetterName("set");
785   SetterName += Name;
786   SetterName[3] = toUppercase(SetterName[3]);
787   return SetterName;
788 }
789 
790 Selector
791 SelectorTable::constructSetterSelector(IdentifierTable &Idents,
792                                        SelectorTable &SelTable,
793                                        const IdentifierInfo *Name) {
794   IdentifierInfo *SetterName =
795     &Idents.get(constructSetterName(Name->getName()));
796   return SelTable.getUnarySelector(SetterName);
797 }
798 
799 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
800   StringRef Name = Sel.getNameForSlot(0);
801   assert(Name.startswith("set") && "invalid setter name");
802   return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
803 }
804 
805 size_t SelectorTable::getTotalMemory() const {
806   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
807   return SelTabImpl.Allocator.getTotalMemory();
808 }
809 
810 Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
811   if (nKeys < 2)
812     return Selector(IIV[0], nKeys);
813 
814   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
815 
816   // Unique selector, to guarantee there is one per name.
817   llvm::FoldingSetNodeID ID;
818   MultiKeywordSelector::Profile(ID, IIV, nKeys);
819 
820   void *InsertPos = nullptr;
821   if (MultiKeywordSelector *SI =
822         SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
823     return Selector(SI);
824 
825   // MultiKeywordSelector objects are not allocated with new because they have a
826   // variable size array (for parameter types) at the end of them.
827   unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
828   MultiKeywordSelector *SI =
829       (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
830           Size, alignof(MultiKeywordSelector));
831   new (SI) MultiKeywordSelector(nKeys, IIV);
832   SelTabImpl.Table.InsertNode(SI, InsertPos);
833   return Selector(SI);
834 }
835 
836 SelectorTable::SelectorTable() {
837   Impl = new SelectorTableImpl();
838 }
839 
840 SelectorTable::~SelectorTable() {
841   delete &getSelectorTableImpl(Impl);
842 }
843 
844 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
845   switch (Operator) {
846   case OO_None:
847   case NUM_OVERLOADED_OPERATORS:
848     return nullptr;
849 
850 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
851   case OO_##Name: return Spelling;
852 #include "clang/Basic/OperatorKinds.def"
853   }
854 
855   llvm_unreachable("Invalid OverloadedOperatorKind!");
856 }
857 
858 StringRef clang::getNullabilitySpelling(NullabilityKind kind,
859                                         bool isContextSensitive) {
860   switch (kind) {
861   case NullabilityKind::NonNull:
862     return isContextSensitive ? "nonnull" : "_Nonnull";
863 
864   case NullabilityKind::Nullable:
865     return isContextSensitive ? "nullable" : "_Nullable";
866 
867   case NullabilityKind::NullableResult:
868     assert(!isContextSensitive &&
869            "_Nullable_result isn't supported as context-sensitive keyword");
870     return "_Nullable_result";
871 
872   case NullabilityKind::Unspecified:
873     return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
874   }
875   llvm_unreachable("Unknown nullability kind.");
876 }
877 
878 llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
879                                      NullabilityKind NK) {
880   switch (NK) {
881   case NullabilityKind::NonNull:
882     return OS << "NonNull";
883   case NullabilityKind::Nullable:
884     return OS << "Nullable";
885   case NullabilityKind::NullableResult:
886     return OS << "NullableResult";
887   case NullabilityKind::Unspecified:
888     return OS << "Unspecified";
889   }
890   llvm_unreachable("Unknown nullability kind.");
891 }
892 
893 diag::kind
894 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
895                                          const LangOptions &LangOpts) {
896   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
897 
898   unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
899 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
900 #include "clang/Basic/TokenKinds.def"
901 #undef KEYWORD
902       ;
903 
904   if (LangOpts.CPlusPlus) {
905     if ((Flags & KEYCXX11) == KEYCXX11)
906       return diag::warn_cxx11_keyword;
907 
908     // char8_t is not modeled as a CXX20_KEYWORD because it's not
909     // unconditionally enabled in C++20 mode. (It can be disabled
910     // by -fno-char8_t.)
911     if (((Flags & KEYCXX20) == KEYCXX20) ||
912         ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
913       return diag::warn_cxx20_keyword;
914   } else {
915     if ((Flags & KEYC99) == KEYC99)
916       return diag::warn_c99_keyword;
917     if ((Flags & KEYC2X) == KEYC2X)
918       return diag::warn_c2x_keyword;
919   }
920 
921   llvm_unreachable(
922       "Keyword not known to come from a newer Standard or proposed Standard");
923 }
924