1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdentifierInfo, IdentifierVisitor, and
10 // IdentifierTable interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/Basic/IdentifierTable.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/DiagnosticLex.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 #include <cstdio>
32 #include <cstring>
33 #include <string>
34
35 using namespace clang;
36
37 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the
38 // largest possible target/aux-target combination. If we exceed this, we likely
39 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
40 static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)),
41 "Insufficient ObjCOrBuiltinID Bits");
42
43 //===----------------------------------------------------------------------===//
44 // IdentifierTable Implementation
45 //===----------------------------------------------------------------------===//
46
47 IdentifierIterator::~IdentifierIterator() = default;
48
49 IdentifierInfoLookup::~IdentifierInfoLookup() = default;
50
51 namespace {
52
53 /// A simple identifier lookup iterator that represents an
54 /// empty sequence of identifiers.
55 class EmptyLookupIterator : public IdentifierIterator
56 {
57 public:
Next()58 StringRef Next() override { return StringRef(); }
59 };
60
61 } // namespace
62
getIdentifiers()63 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
64 return new EmptyLookupIterator();
65 }
66
IdentifierTable(IdentifierInfoLookup * ExternalLookup)67 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
68 : HashTable(8192), // Start with space for 8K identifiers.
69 ExternalLookup(ExternalLookup) {}
70
IdentifierTable(const LangOptions & LangOpts,IdentifierInfoLookup * ExternalLookup)71 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
72 IdentifierInfoLookup *ExternalLookup)
73 : IdentifierTable(ExternalLookup) {
74 // Populate the identifier table with info about keywords for the current
75 // language.
76 AddKeywords(LangOpts);
77 }
78
79 //===----------------------------------------------------------------------===//
80 // Language Keyword Implementation
81 //===----------------------------------------------------------------------===//
82
83 // Constants for TokenKinds.def
84 namespace {
85
86 enum TokenKey : unsigned {
87 KEYC99 = 0x1,
88 KEYCXX = 0x2,
89 KEYCXX11 = 0x4,
90 KEYGNU = 0x8,
91 KEYMS = 0x10,
92 BOOLSUPPORT = 0x20,
93 KEYALTIVEC = 0x40,
94 KEYNOCXX = 0x80,
95 KEYBORLAND = 0x100,
96 KEYOPENCLC = 0x200,
97 KEYC2X = 0x400,
98 KEYNOMS18 = 0x800,
99 KEYNOOPENCL = 0x1000,
100 WCHARSUPPORT = 0x2000,
101 HALFSUPPORT = 0x4000,
102 CHAR8SUPPORT = 0x8000,
103 KEYOBJC = 0x10000,
104 KEYZVECTOR = 0x20000,
105 KEYCOROUTINES = 0x40000,
106 KEYMODULES = 0x80000,
107 KEYCXX20 = 0x100000,
108 KEYOPENCLCXX = 0x200000,
109 KEYMSCOMPAT = 0x400000,
110 KEYSYCL = 0x800000,
111 KEYCUDA = 0x1000000,
112 KEYHLSL = 0x2000000,
113 KEYMAX = KEYHLSL, // The maximum key
114 KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
115 KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 &
116 ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude.
117 };
118
119 /// How a keyword is treated in the selected standard. This enum is ordered
120 /// intentionally so that the value that 'wins' is the most 'permissive'.
121 enum KeywordStatus {
122 KS_Unknown, // Not yet calculated. Used when figuring out the status.
123 KS_Disabled, // Disabled
124 KS_Future, // Is a keyword in future standard
125 KS_Extension, // Is an extension
126 KS_Enabled, // Enabled
127 };
128
129 } // namespace
130
131 // This works on a single TokenKey flag and checks the LangOpts to get the
132 // KeywordStatus based exclusively on this flag, so that it can be merged in
133 // getKeywordStatus. Most should be enabled/disabled, but some might imply
134 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
135 // be disabled, and the calling function makes it 'disabled' if no other flag
136 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
getKeywordStatusHelper(const LangOptions & LangOpts,TokenKey Flag)137 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
138 TokenKey Flag) {
139 // Flag is a single bit version of TokenKey (that is, not
140 // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
141 assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
142
143 switch (Flag) {
144 case KEYC99:
145 if (LangOpts.C99)
146 return KS_Enabled;
147 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
148 case KEYC2X:
149 if (LangOpts.C2x)
150 return KS_Enabled;
151 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
152 case KEYCXX:
153 return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
154 case KEYCXX11:
155 if (LangOpts.CPlusPlus11)
156 return KS_Enabled;
157 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
158 case KEYCXX20:
159 if (LangOpts.CPlusPlus20)
160 return KS_Enabled;
161 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
162 case KEYGNU:
163 return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
164 case KEYMS:
165 return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
166 case BOOLSUPPORT:
167 if (LangOpts.Bool) return KS_Enabled;
168 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
169 case KEYALTIVEC:
170 return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
171 case KEYBORLAND:
172 return LangOpts.Borland ? KS_Extension : KS_Unknown;
173 case KEYOPENCLC:
174 return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
175 : KS_Unknown;
176 case WCHARSUPPORT:
177 return LangOpts.WChar ? KS_Enabled : KS_Unknown;
178 case HALFSUPPORT:
179 return LangOpts.Half ? KS_Enabled : KS_Unknown;
180 case CHAR8SUPPORT:
181 if (LangOpts.Char8) return KS_Enabled;
182 if (LangOpts.CPlusPlus20) return KS_Unknown;
183 if (LangOpts.CPlusPlus) return KS_Future;
184 return KS_Unknown;
185 case KEYOBJC:
186 // We treat bridge casts as objective-C keywords so we can warn on them
187 // in non-arc mode.
188 return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
189 case KEYZVECTOR:
190 return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
191 case KEYCOROUTINES:
192 return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
193 case KEYMODULES:
194 return LangOpts.ModulesTS ? KS_Enabled : KS_Unknown;
195 case KEYOPENCLCXX:
196 return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
197 case KEYMSCOMPAT:
198 return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
199 case KEYSYCL:
200 return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
201 case KEYCUDA:
202 return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
203 case KEYHLSL:
204 return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
205 case KEYNOCXX:
206 // This is enabled in all non-C++ modes, but might be enabled for other
207 // reasons as well.
208 return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
209 case KEYNOOPENCL:
210 // The disable behavior for this is handled in getKeywordStatus.
211 return KS_Unknown;
212 case KEYNOMS18:
213 // The disable behavior for this is handled in getKeywordStatus.
214 return KS_Unknown;
215 default:
216 llvm_unreachable("Unknown KeywordStatus flag");
217 }
218 }
219
220 /// Translates flags as specified in TokenKinds.def into keyword status
221 /// in the given language standard.
getKeywordStatus(const LangOptions & LangOpts,unsigned Flags)222 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
223 unsigned Flags) {
224 // KEYALL means always enabled, so special case this one.
225 if (Flags == KEYALL) return KS_Enabled;
226 // These are tests that need to 'always win', as they are special in that they
227 // disable based on certain conditions.
228 if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
229 if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
230 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
231 return KS_Disabled;
232
233 KeywordStatus CurStatus = KS_Unknown;
234
235 while (Flags != 0) {
236 unsigned CurFlag = Flags & ~(Flags - 1);
237 Flags = Flags & ~CurFlag;
238 CurStatus = std::max(
239 CurStatus,
240 getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
241 }
242
243 if (CurStatus == KS_Unknown)
244 return KS_Disabled;
245 return CurStatus;
246 }
247
248 /// AddKeyword - This method is used to associate a token ID with specific
249 /// identifiers because they are language keywords. This causes the lexer to
250 /// automatically map matching identifiers to specialized token codes.
AddKeyword(StringRef Keyword,tok::TokenKind TokenCode,unsigned Flags,const LangOptions & LangOpts,IdentifierTable & Table)251 static void AddKeyword(StringRef Keyword,
252 tok::TokenKind TokenCode, unsigned Flags,
253 const LangOptions &LangOpts, IdentifierTable &Table) {
254 KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
255
256 // Don't add this keyword if disabled in this language.
257 if (AddResult == KS_Disabled) return;
258
259 IdentifierInfo &Info =
260 Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
261 Info.setIsExtensionToken(AddResult == KS_Extension);
262 Info.setIsFutureCompatKeyword(AddResult == KS_Future);
263 }
264
265 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
266 /// representations.
AddCXXOperatorKeyword(StringRef Keyword,tok::TokenKind TokenCode,IdentifierTable & Table)267 static void AddCXXOperatorKeyword(StringRef Keyword,
268 tok::TokenKind TokenCode,
269 IdentifierTable &Table) {
270 IdentifierInfo &Info = Table.get(Keyword, TokenCode);
271 Info.setIsCPlusPlusOperatorKeyword();
272 }
273
274 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
275 /// or "property".
AddObjCKeyword(StringRef Name,tok::ObjCKeywordKind ObjCID,IdentifierTable & Table)276 static void AddObjCKeyword(StringRef Name,
277 tok::ObjCKeywordKind ObjCID,
278 IdentifierTable &Table) {
279 Table.get(Name).setObjCKeywordID(ObjCID);
280 }
281
282 /// AddKeywords - Add all keywords to the symbol table.
283 ///
AddKeywords(const LangOptions & LangOpts)284 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
285 // Add keywords and tokens for the current language.
286 #define KEYWORD(NAME, FLAGS) \
287 AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \
288 FLAGS, LangOpts, *this);
289 #define ALIAS(NAME, TOK, FLAGS) \
290 AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \
291 FLAGS, LangOpts, *this);
292 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
293 if (LangOpts.CXXOperatorNames) \
294 AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
295 #define OBJC_AT_KEYWORD(NAME) \
296 if (LangOpts.ObjC) \
297 AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
298 #define TESTING_KEYWORD(NAME, FLAGS)
299 #include "clang/Basic/TokenKinds.def"
300
301 if (LangOpts.ParseUnknownAnytype)
302 AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
303 LangOpts, *this);
304
305 if (LangOpts.DeclSpecKeyword)
306 AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
307
308 if (LangOpts.IEEE128)
309 AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
310
311 // Add the 'import' contextual keyword.
312 get("import").setModulesImport(true);
313 }
314
315 /// Checks if the specified token kind represents a keyword in the
316 /// specified language.
317 /// \returns Status of the keyword in the language.
getTokenKwStatus(const LangOptions & LangOpts,tok::TokenKind K)318 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
319 tok::TokenKind K) {
320 switch (K) {
321 #define KEYWORD(NAME, FLAGS) \
322 case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
323 #include "clang/Basic/TokenKinds.def"
324 default: return KS_Disabled;
325 }
326 }
327
328 /// Returns true if the identifier represents a keyword in the
329 /// specified language.
isKeyword(const LangOptions & LangOpts) const330 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
331 switch (getTokenKwStatus(LangOpts, getTokenID())) {
332 case KS_Enabled:
333 case KS_Extension:
334 return true;
335 default:
336 return false;
337 }
338 }
339
340 /// Returns true if the identifier represents a C++ keyword in the
341 /// specified language.
isCPlusPlusKeyword(const LangOptions & LangOpts) const342 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
343 if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
344 return false;
345 // This is a C++ keyword if this identifier is not a keyword when checked
346 // using LangOptions without C++ support.
347 LangOptions LangOptsNoCPP = LangOpts;
348 LangOptsNoCPP.CPlusPlus = false;
349 LangOptsNoCPP.CPlusPlus11 = false;
350 LangOptsNoCPP.CPlusPlus20 = false;
351 return !isKeyword(LangOptsNoCPP);
352 }
353
354 ReservedIdentifierStatus
isReserved(const LangOptions & LangOpts) const355 IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
356 StringRef Name = getName();
357
358 // '_' is a reserved identifier, but its use is so common (e.g. to store
359 // ignored values) that we don't warn on it.
360 if (Name.size() <= 1)
361 return ReservedIdentifierStatus::NotReserved;
362
363 // [lex.name] p3
364 if (Name[0] == '_') {
365
366 // Each name that begins with an underscore followed by an uppercase letter
367 // or another underscore is reserved.
368 if (Name[1] == '_')
369 return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
370
371 if ('A' <= Name[1] && Name[1] <= 'Z')
372 return ReservedIdentifierStatus::
373 StartsWithUnderscoreFollowedByCapitalLetter;
374
375 // This is a bit misleading: it actually means it's only reserved if we're
376 // at global scope because it starts with an underscore.
377 return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
378 }
379
380 // Each name that contains a double underscore (__) is reserved.
381 if (LangOpts.CPlusPlus && Name.contains("__"))
382 return ReservedIdentifierStatus::ContainsDoubleUnderscore;
383
384 return ReservedIdentifierStatus::NotReserved;
385 }
386
deuglifiedName() const387 StringRef IdentifierInfo::deuglifiedName() const {
388 StringRef Name = getName();
389 if (Name.size() >= 2 && Name.front() == '_' &&
390 (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
391 return Name.ltrim('_');
392 return Name;
393 }
394
getPPKeywordID() const395 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
396 // We use a perfect hash function here involving the length of the keyword,
397 // the first and third character. For preprocessor ID's there are no
398 // collisions (if there were, the switch below would complain about duplicate
399 // case values). Note that this depends on 'if' being null terminated.
400
401 #define HASH(LEN, FIRST, THIRD) \
402 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
403 #define CASE(LEN, FIRST, THIRD, NAME) \
404 case HASH(LEN, FIRST, THIRD): \
405 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
406
407 unsigned Len = getLength();
408 if (Len < 2) return tok::pp_not_keyword;
409 const char *Name = getNameStart();
410 switch (HASH(Len, Name[0], Name[2])) {
411 default: return tok::pp_not_keyword;
412 CASE( 2, 'i', '\0', if);
413 CASE( 4, 'e', 'i', elif);
414 CASE( 4, 'e', 's', else);
415 CASE( 4, 'l', 'n', line);
416 CASE( 4, 's', 'c', sccs);
417 CASE( 5, 'e', 'd', endif);
418 CASE( 5, 'e', 'r', error);
419 CASE( 5, 'i', 'e', ident);
420 CASE( 5, 'i', 'd', ifdef);
421 CASE( 5, 'u', 'd', undef);
422
423 CASE( 6, 'a', 's', assert);
424 CASE( 6, 'd', 'f', define);
425 CASE( 6, 'i', 'n', ifndef);
426 CASE( 6, 'i', 'p', import);
427 CASE( 6, 'p', 'a', pragma);
428
429 CASE( 7, 'd', 'f', defined);
430 CASE( 7, 'e', 'i', elifdef);
431 CASE( 7, 'i', 'c', include);
432 CASE( 7, 'w', 'r', warning);
433
434 CASE( 8, 'e', 'i', elifndef);
435 CASE( 8, 'u', 'a', unassert);
436 CASE(12, 'i', 'c', include_next);
437
438 CASE(14, '_', 'p', __public_macro);
439
440 CASE(15, '_', 'p', __private_macro);
441
442 CASE(16, '_', 'i', __include_macros);
443 #undef CASE
444 #undef HASH
445 }
446 }
447
448 //===----------------------------------------------------------------------===//
449 // Stats Implementation
450 //===----------------------------------------------------------------------===//
451
452 /// PrintStats - Print statistics about how well the identifier table is doing
453 /// at hashing identifiers.
PrintStats() const454 void IdentifierTable::PrintStats() const {
455 unsigned NumBuckets = HashTable.getNumBuckets();
456 unsigned NumIdentifiers = HashTable.getNumItems();
457 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
458 unsigned AverageIdentifierSize = 0;
459 unsigned MaxIdentifierLength = 0;
460
461 // TODO: Figure out maximum times an identifier had to probe for -stats.
462 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
463 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
464 unsigned IdLen = I->getKeyLength();
465 AverageIdentifierSize += IdLen;
466 if (MaxIdentifierLength < IdLen)
467 MaxIdentifierLength = IdLen;
468 }
469
470 fprintf(stderr, "\n*** Identifier Table Stats:\n");
471 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
472 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
473 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
474 NumIdentifiers/(double)NumBuckets);
475 fprintf(stderr, "Ave identifier length: %f\n",
476 (AverageIdentifierSize/(double)NumIdentifiers));
477 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
478
479 // Compute statistics about the memory allocated for identifiers.
480 HashTable.getAllocator().PrintStats();
481 }
482
483 //===----------------------------------------------------------------------===//
484 // SelectorTable Implementation
485 //===----------------------------------------------------------------------===//
486
getHashValue(clang::Selector S)487 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
488 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
489 }
490
491 namespace clang {
492
493 /// One of these variable length records is kept for each
494 /// selector containing more than one keyword. We use a folding set
495 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to
496 /// this class is provided strictly through Selector.
497 class alignas(IdentifierInfoAlignment) MultiKeywordSelector
498 : public detail::DeclarationNameExtra,
499 public llvm::FoldingSetNode {
MultiKeywordSelector(unsigned nKeys)500 MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
501
502 public:
503 // Constructor for keyword selectors.
MultiKeywordSelector(unsigned nKeys,IdentifierInfo ** IIV)504 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
505 : DeclarationNameExtra(nKeys) {
506 assert((nKeys > 1) && "not a multi-keyword selector");
507
508 // Fill in the trailing keyword array.
509 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
510 for (unsigned i = 0; i != nKeys; ++i)
511 KeyInfo[i] = IIV[i];
512 }
513
514 // getName - Derive the full selector name and return it.
515 std::string getName() const;
516
517 using DeclarationNameExtra::getNumArgs;
518
519 using keyword_iterator = IdentifierInfo *const *;
520
keyword_begin() const521 keyword_iterator keyword_begin() const {
522 return reinterpret_cast<keyword_iterator>(this + 1);
523 }
524
keyword_end() const525 keyword_iterator keyword_end() const {
526 return keyword_begin() + getNumArgs();
527 }
528
getIdentifierInfoForSlot(unsigned i) const529 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
530 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
531 return keyword_begin()[i];
532 }
533
Profile(llvm::FoldingSetNodeID & ID,keyword_iterator ArgTys,unsigned NumArgs)534 static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
535 unsigned NumArgs) {
536 ID.AddInteger(NumArgs);
537 for (unsigned i = 0; i != NumArgs; ++i)
538 ID.AddPointer(ArgTys[i]);
539 }
540
Profile(llvm::FoldingSetNodeID & ID)541 void Profile(llvm::FoldingSetNodeID &ID) {
542 Profile(ID, keyword_begin(), getNumArgs());
543 }
544 };
545
546 } // namespace clang.
547
isKeywordSelector(ArrayRef<StringRef> Names) const548 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
549 assert(!Names.empty() && "must have >= 1 selector slots");
550 if (getNumArgs() != Names.size())
551 return false;
552 for (unsigned I = 0, E = Names.size(); I != E; ++I) {
553 if (getNameForSlot(I) != Names[I])
554 return false;
555 }
556 return true;
557 }
558
isUnarySelector(StringRef Name) const559 bool Selector::isUnarySelector(StringRef Name) const {
560 return isUnarySelector() && getNameForSlot(0) == Name;
561 }
562
getNumArgs() const563 unsigned Selector::getNumArgs() const {
564 unsigned IIF = getIdentifierInfoFlag();
565 if (IIF <= ZeroArg)
566 return 0;
567 if (IIF == OneArg)
568 return 1;
569 // We point to a MultiKeywordSelector.
570 MultiKeywordSelector *SI = getMultiKeywordSelector();
571 return SI->getNumArgs();
572 }
573
getIdentifierInfoForSlot(unsigned argIndex) const574 IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
575 if (getIdentifierInfoFlag() < MultiArg) {
576 assert(argIndex == 0 && "illegal keyword index");
577 return getAsIdentifierInfo();
578 }
579
580 // We point to a MultiKeywordSelector.
581 MultiKeywordSelector *SI = getMultiKeywordSelector();
582 return SI->getIdentifierInfoForSlot(argIndex);
583 }
584
getNameForSlot(unsigned int argIndex) const585 StringRef Selector::getNameForSlot(unsigned int argIndex) const {
586 IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
587 return II ? II->getName() : StringRef();
588 }
589
getName() const590 std::string MultiKeywordSelector::getName() const {
591 SmallString<256> Str;
592 llvm::raw_svector_ostream OS(Str);
593 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
594 if (*I)
595 OS << (*I)->getName();
596 OS << ':';
597 }
598
599 return std::string(OS.str());
600 }
601
getAsString() const602 std::string Selector::getAsString() const {
603 if (InfoPtr == 0)
604 return "<null selector>";
605
606 if (getIdentifierInfoFlag() < MultiArg) {
607 IdentifierInfo *II = getAsIdentifierInfo();
608
609 if (getNumArgs() == 0) {
610 assert(II && "If the number of arguments is 0 then II is guaranteed to "
611 "not be null.");
612 return std::string(II->getName());
613 }
614
615 if (!II)
616 return ":";
617
618 return II->getName().str() + ":";
619 }
620
621 // We have a multiple keyword selector.
622 return getMultiKeywordSelector()->getName();
623 }
624
print(llvm::raw_ostream & OS) const625 void Selector::print(llvm::raw_ostream &OS) const {
626 OS << getAsString();
627 }
628
dump() const629 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
630
631 /// Interpreting the given string using the normal CamelCase
632 /// conventions, determine whether the given string starts with the
633 /// given "word", which is assumed to end in a lowercase letter.
startsWithWord(StringRef name,StringRef word)634 static bool startsWithWord(StringRef name, StringRef word) {
635 if (name.size() < word.size()) return false;
636 return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
637 name.startswith(word));
638 }
639
getMethodFamilyImpl(Selector sel)640 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
641 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
642 if (!first) return OMF_None;
643
644 StringRef name = first->getName();
645 if (sel.isUnarySelector()) {
646 if (name == "autorelease") return OMF_autorelease;
647 if (name == "dealloc") return OMF_dealloc;
648 if (name == "finalize") return OMF_finalize;
649 if (name == "release") return OMF_release;
650 if (name == "retain") return OMF_retain;
651 if (name == "retainCount") return OMF_retainCount;
652 if (name == "self") return OMF_self;
653 if (name == "initialize") return OMF_initialize;
654 }
655
656 if (name == "performSelector" || name == "performSelectorInBackground" ||
657 name == "performSelectorOnMainThread")
658 return OMF_performSelector;
659
660 // The other method families may begin with a prefix of underscores.
661 while (!name.empty() && name.front() == '_')
662 name = name.substr(1);
663
664 if (name.empty()) return OMF_None;
665 switch (name.front()) {
666 case 'a':
667 if (startsWithWord(name, "alloc")) return OMF_alloc;
668 break;
669 case 'c':
670 if (startsWithWord(name, "copy")) return OMF_copy;
671 break;
672 case 'i':
673 if (startsWithWord(name, "init")) return OMF_init;
674 break;
675 case 'm':
676 if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
677 break;
678 case 'n':
679 if (startsWithWord(name, "new")) return OMF_new;
680 break;
681 default:
682 break;
683 }
684
685 return OMF_None;
686 }
687
getInstTypeMethodFamily(Selector sel)688 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
689 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
690 if (!first) return OIT_None;
691
692 StringRef name = first->getName();
693
694 if (name.empty()) return OIT_None;
695 switch (name.front()) {
696 case 'a':
697 if (startsWithWord(name, "array")) return OIT_Array;
698 break;
699 case 'd':
700 if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
701 if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
702 break;
703 case 's':
704 if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
705 if (startsWithWord(name, "standard")) return OIT_Singleton;
706 break;
707 case 'i':
708 if (startsWithWord(name, "init")) return OIT_Init;
709 break;
710 default:
711 break;
712 }
713 return OIT_None;
714 }
715
getStringFormatFamilyImpl(Selector sel)716 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
717 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
718 if (!first) return SFF_None;
719
720 StringRef name = first->getName();
721
722 switch (name.front()) {
723 case 'a':
724 if (name == "appendFormat") return SFF_NSString;
725 break;
726
727 case 'i':
728 if (name == "initWithFormat") return SFF_NSString;
729 break;
730
731 case 'l':
732 if (name == "localizedStringWithFormat") return SFF_NSString;
733 break;
734
735 case 's':
736 if (name == "stringByAppendingFormat" ||
737 name == "stringWithFormat") return SFF_NSString;
738 break;
739 }
740 return SFF_None;
741 }
742
743 namespace {
744
745 struct SelectorTableImpl {
746 llvm::FoldingSet<MultiKeywordSelector> Table;
747 llvm::BumpPtrAllocator Allocator;
748 };
749
750 } // namespace
751
getSelectorTableImpl(void * P)752 static SelectorTableImpl &getSelectorTableImpl(void *P) {
753 return *static_cast<SelectorTableImpl*>(P);
754 }
755
756 SmallString<64>
constructSetterName(StringRef Name)757 SelectorTable::constructSetterName(StringRef Name) {
758 SmallString<64> SetterName("set");
759 SetterName += Name;
760 SetterName[3] = toUppercase(SetterName[3]);
761 return SetterName;
762 }
763
764 Selector
constructSetterSelector(IdentifierTable & Idents,SelectorTable & SelTable,const IdentifierInfo * Name)765 SelectorTable::constructSetterSelector(IdentifierTable &Idents,
766 SelectorTable &SelTable,
767 const IdentifierInfo *Name) {
768 IdentifierInfo *SetterName =
769 &Idents.get(constructSetterName(Name->getName()));
770 return SelTable.getUnarySelector(SetterName);
771 }
772
getPropertyNameFromSetterSelector(Selector Sel)773 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
774 StringRef Name = Sel.getNameForSlot(0);
775 assert(Name.startswith("set") && "invalid setter name");
776 return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
777 }
778
getTotalMemory() const779 size_t SelectorTable::getTotalMemory() const {
780 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
781 return SelTabImpl.Allocator.getTotalMemory();
782 }
783
getSelector(unsigned nKeys,IdentifierInfo ** IIV)784 Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
785 if (nKeys < 2)
786 return Selector(IIV[0], nKeys);
787
788 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
789
790 // Unique selector, to guarantee there is one per name.
791 llvm::FoldingSetNodeID ID;
792 MultiKeywordSelector::Profile(ID, IIV, nKeys);
793
794 void *InsertPos = nullptr;
795 if (MultiKeywordSelector *SI =
796 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
797 return Selector(SI);
798
799 // MultiKeywordSelector objects are not allocated with new because they have a
800 // variable size array (for parameter types) at the end of them.
801 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
802 MultiKeywordSelector *SI =
803 (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
804 Size, alignof(MultiKeywordSelector));
805 new (SI) MultiKeywordSelector(nKeys, IIV);
806 SelTabImpl.Table.InsertNode(SI, InsertPos);
807 return Selector(SI);
808 }
809
SelectorTable()810 SelectorTable::SelectorTable() {
811 Impl = new SelectorTableImpl();
812 }
813
~SelectorTable()814 SelectorTable::~SelectorTable() {
815 delete &getSelectorTableImpl(Impl);
816 }
817
getOperatorSpelling(OverloadedOperatorKind Operator)818 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
819 switch (Operator) {
820 case OO_None:
821 case NUM_OVERLOADED_OPERATORS:
822 return nullptr;
823
824 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
825 case OO_##Name: return Spelling;
826 #include "clang/Basic/OperatorKinds.def"
827 }
828
829 llvm_unreachable("Invalid OverloadedOperatorKind!");
830 }
831
getNullabilitySpelling(NullabilityKind kind,bool isContextSensitive)832 StringRef clang::getNullabilitySpelling(NullabilityKind kind,
833 bool isContextSensitive) {
834 switch (kind) {
835 case NullabilityKind::NonNull:
836 return isContextSensitive ? "nonnull" : "_Nonnull";
837
838 case NullabilityKind::Nullable:
839 return isContextSensitive ? "nullable" : "_Nullable";
840
841 case NullabilityKind::NullableResult:
842 assert(!isContextSensitive &&
843 "_Nullable_result isn't supported as context-sensitive keyword");
844 return "_Nullable_result";
845
846 case NullabilityKind::Unspecified:
847 return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
848 }
849 llvm_unreachable("Unknown nullability kind.");
850 }
851
852 diag::kind
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)853 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
854 const LangOptions &LangOpts) {
855 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
856
857 unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
858 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
859 #include "clang/Basic/TokenKinds.def"
860 #undef KEYWORD
861 ;
862
863 if (LangOpts.CPlusPlus) {
864 if ((Flags & KEYCXX11) == KEYCXX11)
865 return diag::warn_cxx11_keyword;
866
867 // char8_t is not modeled as a CXX20_KEYWORD because it's not
868 // unconditionally enabled in C++20 mode. (It can be disabled
869 // by -fno-char8_t.)
870 if (((Flags & KEYCXX20) == KEYCXX20) ||
871 ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
872 return diag::warn_cxx20_keyword;
873 } else {
874 if ((Flags & KEYC99) == KEYC99)
875 return diag::warn_c99_keyword;
876 if ((Flags & KEYC2X) == KEYC2X)
877 return diag::warn_c2x_keyword;
878 }
879
880 llvm_unreachable(
881 "Keyword not known to come from a newer Standard or proposed Standard");
882 }
883