1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include "clang/AST/AST.h"
7 #include "clang/AST/ASTConsumer.h"
8 #include "clang/AST/ASTContext.h"
9 #include "clang/AST/Expr.h"
10 #include "clang/AST/ExprCXX.h"
11 #include "clang/AST/Mangle.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Basic/Version.h"
15 #include "clang/Frontend/CompilerInstance.h"
16 #include "clang/Frontend/FrontendPluginRegistry.h"
17 #include "clang/Lex/Lexer.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/raw_ostream.h"
22
23 #include <iostream>
24 #include <map>
25 #include <memory>
26 #include <sstream>
27 #include <tuple>
28 #include <unordered_set>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32
33 #include "FileOperations.h"
34 #include "JSONFormatter.h"
35 #include "StringOperations.h"
36
37 #if CLANG_VERSION_MAJOR < 8
38 // Starting with Clang 8.0 some basic functions have been renamed
39 #define getBeginLoc getLocStart
40 #define getEndLoc getLocEnd
41 #endif
42 // We want std::make_unique, but that's only available in c++14. In versions
43 // prior to that, we need to fall back to llvm's make_unique. It's also the
44 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to
45 // build with c++11, at least as suggested by the llvm-config --cxxflags on
46 // non-windows platforms. mozilla-central seems to build with -std=c++17 on
47 // windows so we need to make this decision based on __cplusplus instead of
48 // the CLANG_VERSION_MAJOR.
49 #if __cplusplus < 201402L
50 using llvm::make_unique;
51 #else
52 using std::make_unique;
53 #endif
54
55 using namespace clang;
56
57 const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
58
59 // Absolute path to directory containing source code.
60 std::string Srcdir;
61
62 // Absolute path to objdir (including generated code).
63 std::string Objdir;
64
65 // Absolute path where analysis JSON output will be stored.
66 std::string Outdir;
67
68 #if !defined(_WIN32) && !defined(_WIN64)
69 #include <sys/time.h>
70
time()71 static double time() {
72 struct timeval Tv;
73 gettimeofday(&Tv, nullptr);
74 return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
75 }
76 #endif
77
78 // Return true if |input| is a valid C++ identifier. We don't want to generate
79 // analysis information for operators, string literals, etc. by accident since
80 // it trips up consumers of the data.
isValidIdentifier(std::string Input)81 static bool isValidIdentifier(std::string Input) {
82 for (char C : Input) {
83 if (!(isalpha(C) || isdigit(C) || C == '_')) {
84 return false;
85 }
86 }
87 return true;
88 }
89
90 struct RAIITracer {
RAIITracerRAIITracer91 RAIITracer(const char *log) : mLog(log) {
92 printf("<%s>\n", mLog);
93 }
94
~RAIITracerRAIITracer95 ~RAIITracer() {
96 printf("</%s>\n", mLog);
97 }
98
99 const char* mLog;
100 };
101
102 #define TRACEFUNC RAIITracer tracer(__FUNCTION__);
103
104 class IndexConsumer;
105
106 // For each C++ file seen by the analysis (.cpp or .h), we track a
107 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether
108 // it's in the source dir or the objdir). We also store the analysis output
109 // here.
110 struct FileInfo {
FileInfoFileInfo111 FileInfo(std::string &Rname) : Realname(Rname) {
112 if (Rname.compare(0, Objdir.length(), Objdir) == 0) {
113 // We're in the objdir, so we are probably a generated header
114 // We use the escape character to indicate the objdir nature.
115 // Note that output also has the `/' already placed
116 Interesting = true;
117 Generated = true;
118 Realname.replace(0, Objdir.length(), GENERATED);
119 return;
120 }
121
122 // Empty filenames can get turned into Srcdir when they are resolved as
123 // absolute paths, so we should exclude files that are exactly equal to
124 // Srcdir or anything outside Srcdir.
125 Interesting = (Rname.length() > Srcdir.length()) &&
126 (Rname.compare(0, Srcdir.length(), Srcdir) == 0);
127 Generated = false;
128 if (Interesting) {
129 // Remove the trailing `/' as well.
130 Realname.erase(0, Srcdir.length() + 1);
131 }
132 }
133 std::string Realname;
134 std::vector<std::string> Output;
135 bool Interesting;
136 bool Generated;
137 };
138
139 class IndexConsumer;
140
141 class PreprocessorHook : public PPCallbacks {
142 IndexConsumer *Indexer;
143
144 public:
PreprocessorHook(IndexConsumer * C)145 PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
146
147 virtual void MacroDefined(const Token &Tok,
148 const MacroDirective *Md) override;
149
150 virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
151 SourceRange Range, const MacroArgs *Ma) override;
152 virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
153 const MacroDirective *Undef) override;
154 virtual void Defined(const Token &Tok, const MacroDefinition &Md,
155 SourceRange Range) override;
156 virtual void Ifdef(SourceLocation Loc, const Token &Tok,
157 const MacroDefinition &Md) override;
158 virtual void Ifndef(SourceLocation Loc, const Token &Tok,
159 const MacroDefinition &Md) override;
160 };
161
162 class IndexConsumer : public ASTConsumer,
163 public RecursiveASTVisitor<IndexConsumer>,
164 public DiagnosticConsumer {
165 private:
166 CompilerInstance &CI;
167 SourceManager &SM;
168 LangOptions &LO;
169 std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
170 MangleContext *CurMangleContext;
171 ASTContext *AstContext;
172
173 typedef RecursiveASTVisitor<IndexConsumer> Super;
174
175 // Tracks the set of declarations that the current expression/statement is
176 // nested inside of.
177 struct AutoSetContext {
AutoSetContextIndexConsumer::AutoSetContext178 AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false)
179 : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
180 this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false);
181 Self->CurDeclContext = this;
182 }
183
~AutoSetContextIndexConsumer::AutoSetContext184 ~AutoSetContext() { Self->CurDeclContext = Prev; }
185
186 IndexConsumer *Self;
187 AutoSetContext *Prev;
188 NamedDecl *Decl;
189 bool VisitImplicit;
190 };
191 AutoSetContext *CurDeclContext;
192
getFileInfo(SourceLocation Loc)193 FileInfo *getFileInfo(SourceLocation Loc) {
194 FileID Id = SM.getFileID(Loc);
195
196 std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
197 It = FileMap.find(Id);
198 if (It == FileMap.end()) {
199 // We haven't seen this file before. We need to make the FileInfo
200 // structure information ourselves
201 std::string Filename = SM.getFilename(Loc);
202 std::string Absolute;
203 // If Loc is a macro id rather than a file id, it Filename might be
204 // empty. Also for some types of file locations that are clang-internal
205 // like "<scratch>" it can return an empty Filename. In these cases we
206 // want to leave Absolute as empty.
207 if (!Filename.empty()) {
208 Absolute = getAbsolutePath(Filename);
209 if (Absolute.empty()) {
210 Absolute = Filename;
211 }
212 }
213 std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute);
214 It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
215 }
216 return It->second.get();
217 }
218
219 // Helpers for processing declarations
220 // Should we ignore this location?
isInterestingLocation(SourceLocation Loc)221 bool isInterestingLocation(SourceLocation Loc) {
222 if (Loc.isInvalid()) {
223 return false;
224 }
225
226 return getFileInfo(Loc)->Interesting;
227 }
228
229 // Convert location to "line:column" or "line:column-column" given length.
230 // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
231 // column is 0-based and unpadded.
locationToString(SourceLocation Loc,size_t Length=0)232 std::string locationToString(SourceLocation Loc, size_t Length = 0) {
233 std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc);
234
235 bool IsInvalid;
236 unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
237 if (IsInvalid) {
238 return "";
239 }
240 unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
241 if (IsInvalid) {
242 return "";
243 }
244
245 if (Length) {
246 return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
247 } else {
248 return stringFormat("%05d:%d", Line, Column - 1);
249 }
250 }
251
252 // Convert SourceRange to "line-line".
253 // In the resulting string rep, line is 1-based.
lineRangeToString(SourceRange Range)254 std::string lineRangeToString(SourceRange Range) {
255 std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
256 std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
257
258 bool IsInvalid;
259 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
260 if (IsInvalid) {
261 return "";
262 }
263 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
264 if (IsInvalid) {
265 return "";
266 }
267
268 return stringFormat("%d-%d", Line1, Line2);
269 }
270
271 // Convert SourceRange to "line:column-line:column".
272 // In the resulting string rep, line is 1-based, column is 0-based.
fullRangeToString(SourceRange Range)273 std::string fullRangeToString(SourceRange Range) {
274 std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
275 std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
276
277 bool IsInvalid;
278 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
279 if (IsInvalid) {
280 return "";
281 }
282 unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
283 if (IsInvalid) {
284 return "";
285 }
286 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
287 if (IsInvalid) {
288 return "";
289 }
290 unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
291 if (IsInvalid) {
292 return "";
293 }
294
295 return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
296 }
297
298 // Returns the qualified name of `d` without considering template parameters.
getQualifiedName(const NamedDecl * D)299 std::string getQualifiedName(const NamedDecl *D) {
300 const DeclContext *Ctx = D->getDeclContext();
301 if (Ctx->isFunctionOrMethod()) {
302 return D->getQualifiedNameAsString();
303 }
304
305 std::vector<const DeclContext *> Contexts;
306
307 // Collect contexts.
308 while (Ctx && isa<NamedDecl>(Ctx)) {
309 Contexts.push_back(Ctx);
310 Ctx = Ctx->getParent();
311 }
312
313 std::string Result;
314
315 std::reverse(Contexts.begin(), Contexts.end());
316
317 for (const DeclContext *DC : Contexts) {
318 if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
319 Result += Spec->getNameAsString();
320
321 if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
322 std::string Backing;
323 llvm::raw_string_ostream Stream(Backing);
324 const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
325 printTemplateArgumentList(
326 Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts()));
327 Result += Stream.str();
328 }
329 } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
330 if (Nd->isAnonymousNamespace() || Nd->isInline()) {
331 continue;
332 }
333 Result += Nd->getNameAsString();
334 } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
335 if (!Rd->getIdentifier()) {
336 Result += "(anonymous)";
337 } else {
338 Result += Rd->getNameAsString();
339 }
340 } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
341 Result += Fd->getNameAsString();
342 } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
343 // C++ [dcl.enum]p10: Each enum-name and each unscoped
344 // enumerator is declared in the scope that immediately contains
345 // the enum-specifier. Each scoped enumerator is declared in the
346 // scope of the enumeration.
347 if (Ed->isScoped() || Ed->getIdentifier())
348 Result += Ed->getNameAsString();
349 else
350 continue;
351 } else {
352 Result += cast<NamedDecl>(DC)->getNameAsString();
353 }
354 Result += "::";
355 }
356
357 if (D->getDeclName())
358 Result += D->getNameAsString();
359 else
360 Result += "(anonymous)";
361
362 return Result;
363 }
364
mangleLocation(SourceLocation Loc,std::string Backup=std::string ())365 std::string mangleLocation(SourceLocation Loc,
366 std::string Backup = std::string()) {
367 FileInfo *F = getFileInfo(Loc);
368 std::string Filename = F->Realname;
369 if (Filename.length() == 0 && Backup.length() != 0) {
370 return Backup;
371 }
372 if (F->Generated) {
373 // Since generated files may be different on different platforms,
374 // we need to include a platform-specific thing in the hash. Otherwise
375 // we can end up with hash collisions where different symbols from
376 // different platforms map to the same thing.
377 char* Platform = getenv("MOZSEARCH_PLATFORM");
378 Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
379 }
380 return hash(Filename + std::string("@") + locationToString(Loc));
381 }
382
mangleQualifiedName(std::string Name)383 std::string mangleQualifiedName(std::string Name) {
384 std::replace(Name.begin(), Name.end(), ' ', '_');
385 return Name;
386 }
387
getMangledName(clang::MangleContext * Ctx,const clang::NamedDecl * Decl)388 std::string getMangledName(clang::MangleContext *Ctx,
389 const clang::NamedDecl *Decl) {
390 if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
391 return cast<FunctionDecl>(Decl)->getNameAsString();
392 }
393
394 if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
395 const DeclContext *DC = Decl->getDeclContext();
396 if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
397 isa<LinkageSpecDecl>(DC) ||
398 // isa<ExternCContextDecl>(DC) ||
399 isa<TagDecl>(DC)) {
400 llvm::SmallVector<char, 512> Output;
401 llvm::raw_svector_ostream Out(Output);
402 if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
403 Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
404 } else if (const CXXDestructorDecl *D =
405 dyn_cast<CXXDestructorDecl>(Decl)) {
406 Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
407 } else {
408 Ctx->mangleName(Decl, Out);
409 }
410 return Out.str().str();
411 } else {
412 return std::string("V_") + mangleLocation(Decl->getLocation()) +
413 std::string("_") + hash(Decl->getName());
414 }
415 } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) ||
416 isa<ObjCInterfaceDecl>(Decl)) {
417 if (!Decl->getIdentifier()) {
418 // Anonymous.
419 return std::string("T_") + mangleLocation(Decl->getLocation());
420 }
421
422 return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
423 } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
424 if (!Decl->getIdentifier()) {
425 // Anonymous.
426 return std::string("NS_") + mangleLocation(Decl->getLocation());
427 }
428
429 return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
430 } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
431 const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
432 return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
433 D2->getNameAsString();
434 } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
435 const RecordDecl *Record = D2->getParent();
436 return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
437 D2->getNameAsString();
438 } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
439 const DeclContext *DC = Decl->getDeclContext();
440 if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
441 return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
442 D2->getNameAsString();
443 }
444 }
445
446 assert(false);
447 return std::string("");
448 }
449
debugLocation(SourceLocation Loc)450 void debugLocation(SourceLocation Loc) {
451 std::string S = locationToString(Loc);
452 StringRef Filename = SM.getFilename(Loc);
453 printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
454 }
455
debugRange(SourceRange Range)456 void debugRange(SourceRange Range) {
457 printf("Range\n");
458 debugLocation(Range.getBegin());
459 debugLocation(Range.getEnd());
460 }
461
462 public:
IndexConsumer(CompilerInstance & CI)463 IndexConsumer(CompilerInstance &CI)
464 : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr),
465 AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
466 CI.getPreprocessor().addPPCallbacks(
467 make_unique<PreprocessorHook>(this));
468 }
469
clone(DiagnosticsEngine & Diags) const470 virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
471 return new IndexConsumer(CI);
472 }
473
474 #if !defined(_WIN32) && !defined(_WIN64)
475 struct AutoTime {
AutoTimeIndexConsumer::AutoTime476 AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
~AutoTimeIndexConsumer::AutoTime477 ~AutoTime() {
478 if (Start) {
479 *Counter += time() - Start;
480 }
481 }
stopIndexConsumer::AutoTime482 void stop() {
483 *Counter += time() - Start;
484 Start = 0;
485 }
486 double *Counter;
487 double Start;
488 };
489 #endif
490
491 // All we need is to follow the final declaration.
HandleTranslationUnit(ASTContext & Ctx)492 virtual void HandleTranslationUnit(ASTContext &Ctx) {
493 CurMangleContext =
494 clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
495
496 AstContext = &Ctx;
497 TraverseDecl(Ctx.getTranslationUnitDecl());
498
499 // Emit the JSON data for all files now.
500 std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
501 for (It = FileMap.begin(); It != FileMap.end(); It++) {
502 if (!It->second->Interesting) {
503 continue;
504 }
505
506 FileInfo &Info = *It->second;
507
508 std::string Filename = Outdir + Info.Realname;
509 std::string SrcFilename = Info.Generated
510 ? Objdir + Info.Realname.substr(GENERATED.length())
511 : Srcdir + PATHSEP_STRING + Info.Realname;
512
513 ensurePath(Filename);
514
515 // We lock the output file in case some other clang process is trying to
516 // write to it at the same time.
517 AutoLockFile Lock(SrcFilename, Filename);
518
519 if (!Lock.success()) {
520 fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
521 exit(1);
522 }
523
524 // Merge our results with the existing lines from the output file.
525 // This ensures that header files that are included multiple times
526 // in different ways are analyzed completely.
527
528 FILE *Fp = Lock.openFile();
529 if (!Fp) {
530 fprintf(stderr, "Unable to open input file %s\n", Filename.c_str());
531 exit(1);
532 }
533 FILE *OutFp = Lock.openTmp();
534 if (!OutFp) {
535 fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str());
536 exit(1);
537 }
538
539 // Sort our new results and get an iterator to them
540 std::sort(Info.Output.begin(), Info.Output.end());
541 std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin();
542 std::string LastNewWritten;
543
544 // Loop over the existing (sorted) lines in the analysis output file.
545 char Buffer[65536];
546 while (fgets(Buffer, sizeof(Buffer), Fp)) {
547 std::string OldLine(Buffer);
548
549 // Write any results from Info.Output that are lexicographically
550 // smaller than OldLine (read from the existing file), but make sure
551 // to skip duplicates. Keep advacing NewLinesIter until we reach an
552 // entry that is lexicographically greater than OldLine.
553 for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
554 if (*NewLinesIter > OldLine) {
555 break;
556 }
557 if (*NewLinesIter == OldLine) {
558 continue;
559 }
560 if (*NewLinesIter == LastNewWritten) {
561 // dedupe the new entries being written
562 continue;
563 }
564 if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
565 fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
566 exit(1);
567 }
568 LastNewWritten = *NewLinesIter;
569 }
570
571 // Write the entry read from the existing file.
572 if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
573 fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
574 exit(1);
575 }
576 }
577
578 // We finished reading from Fp
579 fclose(Fp);
580
581 // Finish iterating our new results, discarding duplicates
582 for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
583 if (*NewLinesIter == LastNewWritten) {
584 continue;
585 }
586 if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
587 fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
588 exit(1);
589 }
590 LastNewWritten = *NewLinesIter;
591 }
592
593 // Done writing all the things, close it and replace the old output file
594 // with the new one.
595 fclose(OutFp);
596 if (!Lock.moveTmp()) {
597 fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno);
598 exit(1);
599 }
600 }
601 }
602
603 // Return a list of mangled names of all the methods that the given method
604 // overrides.
findOverriddenMethods(const CXXMethodDecl * Method,std::vector<std::string> & Symbols)605 void findOverriddenMethods(const CXXMethodDecl *Method,
606 std::vector<std::string> &Symbols) {
607 std::string Mangled = getMangledName(CurMangleContext, Method);
608 Symbols.push_back(Mangled);
609
610 CXXMethodDecl::method_iterator Iter = Method->begin_overridden_methods();
611 CXXMethodDecl::method_iterator End = Method->end_overridden_methods();
612 for (; Iter != End; Iter++) {
613 const CXXMethodDecl *Decl = *Iter;
614 if (Decl->isTemplateInstantiation()) {
615 Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
616 }
617 return findOverriddenMethods(Decl, Symbols);
618 }
619 }
620
621 // Unfortunately, we have to override all these methods in order to track the
622 // context we're inside.
623
TraverseEnumDecl(EnumDecl * D)624 bool TraverseEnumDecl(EnumDecl *D) {
625 AutoSetContext Asc(this, D);
626 return Super::TraverseEnumDecl(D);
627 }
TraverseRecordDecl(RecordDecl * D)628 bool TraverseRecordDecl(RecordDecl *D) {
629 AutoSetContext Asc(this, D);
630 return Super::TraverseRecordDecl(D);
631 }
TraverseCXXRecordDecl(CXXRecordDecl * D)632 bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
633 AutoSetContext Asc(this, D);
634 return Super::TraverseCXXRecordDecl(D);
635 }
TraverseFunctionDecl(FunctionDecl * D)636 bool TraverseFunctionDecl(FunctionDecl *D) {
637 AutoSetContext Asc(this, D);
638 const FunctionDecl *Def;
639 // (See the larger AutoTemplateContext comment for more information.) If a
640 // method on a templated class is declared out-of-line, we need to analyze
641 // the definition inside the scope of the template or else we won't properly
642 // handle member access on the templated type.
643 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
644 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
645 }
646 return Super::TraverseFunctionDecl(D);
647 }
TraverseCXXMethodDecl(CXXMethodDecl * D)648 bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
649 AutoSetContext Asc(this, D);
650 const FunctionDecl *Def;
651 // See TraverseFunctionDecl.
652 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
653 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
654 }
655 return Super::TraverseCXXMethodDecl(D);
656 }
TraverseCXXConstructorDecl(CXXConstructorDecl * D)657 bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
658 AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
659 const FunctionDecl *Def;
660 // See TraverseFunctionDecl.
661 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
662 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
663 }
664 return Super::TraverseCXXConstructorDecl(D);
665 }
TraverseCXXConversionDecl(CXXConversionDecl * D)666 bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
667 AutoSetContext Asc(this, D);
668 const FunctionDecl *Def;
669 // See TraverseFunctionDecl.
670 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
671 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
672 }
673 return Super::TraverseCXXConversionDecl(D);
674 }
TraverseCXXDestructorDecl(CXXDestructorDecl * D)675 bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
676 AutoSetContext Asc(this, D);
677 const FunctionDecl *Def;
678 // See TraverseFunctionDecl.
679 if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
680 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
681 }
682 return Super::TraverseCXXDestructorDecl(D);
683 }
684
685 // Used to keep track of the context in which a token appears.
686 struct Context {
687 // Ultimately this becomes the "context" JSON property.
688 std::string Name;
689
690 // Ultimately this becomes the "contextsym" JSON property.
691 std::vector<std::string> Symbols;
692
ContextIndexConsumer::Context693 Context() {}
ContextIndexConsumer::Context694 Context(std::string Name, std::vector<std::string> Symbols)
695 : Name(Name), Symbols(Symbols) {}
696 };
697
translateContext(NamedDecl * D)698 Context translateContext(NamedDecl *D) {
699 const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
700 if (F && F->isTemplateInstantiation()) {
701 D = F->getTemplateInstantiationPattern();
702 }
703
704 std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)};
705 if (CXXMethodDecl::classof(D)) {
706 Symbols.clear();
707 findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols);
708 }
709 return Context(D->getQualifiedNameAsString(), Symbols);
710 }
711
getContext(SourceLocation Loc)712 Context getContext(SourceLocation Loc) {
713 if (SM.isMacroBodyExpansion(Loc)) {
714 // If we're inside a macro definition, we don't return any context. It
715 // will probably not be what the user expects if we do.
716 return Context();
717 }
718
719 if (CurDeclContext) {
720 return translateContext(CurDeclContext->Decl);
721 }
722 return Context();
723 }
724
725 // Similar to GetContext(SourceLocation), but it skips the declaration passed
726 // in. This is useful if we want the context of a declaration that's already
727 // on the stack.
getContext(Decl * D)728 Context getContext(Decl *D) {
729 if (SM.isMacroBodyExpansion(D->getLocation())) {
730 // If we're inside a macro definition, we don't return any context. It
731 // will probably not be what the user expects if we do.
732 return Context();
733 }
734
735 AutoSetContext *Ctxt = CurDeclContext;
736 while (Ctxt) {
737 if (Ctxt->Decl != D) {
738 return translateContext(Ctxt->Decl);
739 }
740 Ctxt = Ctxt->Prev;
741 }
742 return Context();
743 }
744
concatSymbols(const std::vector<std::string> Symbols)745 static std::string concatSymbols(const std::vector<std::string> Symbols) {
746 if (Symbols.empty()) {
747 return "";
748 }
749
750 size_t Total = 0;
751 for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
752 Total += It->length();
753 }
754 Total += Symbols.size() - 1;
755
756 std::string SymbolList;
757 SymbolList.reserve(Total);
758
759 for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
760 std::string Symbol = *It;
761
762 if (It != Symbols.begin()) {
763 SymbolList.push_back(',');
764 }
765 SymbolList.append(Symbol);
766 }
767
768 return SymbolList;
769 }
770
771 // Analyzing template code is tricky. Suppose we have this code:
772 //
773 // template<class T>
774 // bool Foo(T* ptr) { return T::StaticMethod(ptr); }
775 //
776 // If we analyze the body of Foo without knowing the type T, then we will not
777 // be able to generate any information for StaticMethod. However, analyzing
778 // Foo for every possible instantiation is inefficient and it also generates
779 // too much data in some cases. For example, the following code would generate
780 // one definition of Baz for every instantiation, which is undesirable:
781 //
782 // template<class T>
783 // class Bar { struct Baz { ... }; };
784 //
785 // To solve this problem, we analyze templates only once. We do so in a
786 // GatherDependent mode where we look for "dependent scoped member
787 // expressions" (i.e., things like StaticMethod). We keep track of the
788 // locations of these expressions. If we find one or more of them, we analyze
789 // the template for each instantiation, in an AnalyzeDependent mode. This mode
790 // ignores all source locations except for the ones where we found dependent
791 // scoped member expressions before. For these locations, we generate a
792 // separate JSON result for each instantiation.
793 //
794 // We inherit our parent's mode if it is exists. This is because if our
795 // parent is in analyze mode, it means we've already lived a full life in
796 // gather mode and we must not restart in gather mode or we'll cause the
797 // indexer to visit EVERY identifier, which is way too much data.
798 struct AutoTemplateContext {
AutoTemplateContextIndexConsumer::AutoTemplateContext799 AutoTemplateContext(IndexConsumer *Self)
800 : Self(Self)
801 , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent)
802 , Parent(Self->TemplateStack) {
803 Self->TemplateStack = this;
804 }
805
~AutoTemplateContextIndexConsumer::AutoTemplateContext806 ~AutoTemplateContext() { Self->TemplateStack = Parent; }
807
808 // We traverse templates in two modes:
809 enum class Mode {
810 // Gather mode does not traverse into specializations. It looks for
811 // locations where it would help to have more info from template
812 // specializations.
813 GatherDependent,
814
815 // Analyze mode traverses into template specializations and records
816 // information about token locations saved in gather mode.
817 AnalyzeDependent,
818 };
819
820 // We found a dependent scoped member expression! Keep track of it for
821 // later.
visitDependentIndexConsumer::AutoTemplateContext822 void visitDependent(SourceLocation Loc) {
823 if (CurMode == Mode::AnalyzeDependent) {
824 return;
825 }
826
827 DependentLocations.insert(Loc.getRawEncoding());
828 if (Parent) {
829 Parent->visitDependent(Loc);
830 }
831 }
832
inGatherModeIndexConsumer::AutoTemplateContext833 bool inGatherMode() {
834 return CurMode == Mode::GatherDependent;
835 }
836
837 // Do we need to perform the extra AnalyzeDependent passes (one per
838 // instantiation)?
needsAnalysisIndexConsumer::AutoTemplateContext839 bool needsAnalysis() const {
840 if (!DependentLocations.empty()) {
841 return true;
842 }
843 if (Parent) {
844 return Parent->needsAnalysis();
845 }
846 return false;
847 }
848
switchModeIndexConsumer::AutoTemplateContext849 void switchMode() { CurMode = Mode::AnalyzeDependent; }
850
851 // Do we want to analyze each template instantiation separately?
shouldVisitTemplateInstantiationsIndexConsumer::AutoTemplateContext852 bool shouldVisitTemplateInstantiations() const {
853 if (CurMode == Mode::AnalyzeDependent) {
854 return true;
855 }
856 if (Parent) {
857 return Parent->shouldVisitTemplateInstantiations();
858 }
859 return false;
860 }
861
862 // For a given expression/statement, should we emit JSON data for it?
shouldVisitIndexConsumer::AutoTemplateContext863 bool shouldVisit(SourceLocation Loc) {
864 if (CurMode == Mode::GatherDependent) {
865 return true;
866 }
867 if (DependentLocations.find(Loc.getRawEncoding()) !=
868 DependentLocations.end()) {
869 return true;
870 }
871 if (Parent) {
872 return Parent->shouldVisit(Loc);
873 }
874 return false;
875 }
876
877 private:
878 IndexConsumer *Self;
879 Mode CurMode;
880 std::unordered_set<unsigned> DependentLocations;
881 AutoTemplateContext *Parent;
882 };
883
884 AutoTemplateContext *TemplateStack;
885
shouldVisitTemplateInstantiations() const886 bool shouldVisitTemplateInstantiations() const {
887 if (TemplateStack) {
888 return TemplateStack->shouldVisitTemplateInstantiations();
889 }
890 return false;
891 }
892
shouldVisitImplicitCode() const893 bool shouldVisitImplicitCode() const {
894 return CurDeclContext && CurDeclContext->VisitImplicit;
895 }
896
TraverseClassTemplateDecl(ClassTemplateDecl * D)897 bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
898 AutoTemplateContext Atc(this);
899 Super::TraverseClassTemplateDecl(D);
900
901 if (!Atc.needsAnalysis()) {
902 return true;
903 }
904
905 Atc.switchMode();
906
907 if (D != D->getCanonicalDecl()) {
908 return true;
909 }
910
911 for (auto *Spec : D->specializations()) {
912 for (auto *Rd : Spec->redecls()) {
913 // We don't want to visit injected-class-names in this traversal.
914 if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
915 continue;
916
917 TraverseDecl(Rd);
918 }
919 }
920
921 return true;
922 }
923
TraverseFunctionTemplateDecl(FunctionTemplateDecl * D)924 bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
925 AutoTemplateContext Atc(this);
926 if (Atc.inGatherMode()) {
927 Super::TraverseFunctionTemplateDecl(D);
928 }
929
930 if (!Atc.needsAnalysis()) {
931 return true;
932 }
933
934 Atc.switchMode();
935
936 if (D != D->getCanonicalDecl()) {
937 return true;
938 }
939
940 for (auto *Spec : D->specializations()) {
941 for (auto *Rd : Spec->redecls()) {
942 TraverseDecl(Rd);
943 }
944 }
945
946 return true;
947 }
948
shouldVisit(SourceLocation Loc)949 bool shouldVisit(SourceLocation Loc) {
950 if (TemplateStack) {
951 return TemplateStack->shouldVisit(Loc);
952 }
953 return true;
954 }
955
956 enum {
957 NoCrossref = 1 << 0,
958 OperatorToken = 1 << 1,
959 };
960
961 // This is the only function that emits analysis JSON data. It should be
962 // called for each identifier that corresponds to a symbol.
visitIdentifier(const char * Kind,const char * SyntaxKind,std::string QualName,SourceLocation Loc,const std::vector<std::string> & Symbols,Context TokenContext=Context (),int Flags=0,SourceRange PeekRange=SourceRange (),SourceRange NestingRange=SourceRange ())963 void visitIdentifier(const char *Kind, const char *SyntaxKind,
964 std::string QualName, SourceLocation Loc,
965 const std::vector<std::string> &Symbols,
966 Context TokenContext = Context(), int Flags = 0,
967 SourceRange PeekRange = SourceRange(),
968 SourceRange NestingRange = SourceRange()) {
969 if (!shouldVisit(Loc)) {
970 return;
971 }
972
973 // Find the file positions corresponding to the token.
974 unsigned StartOffset = SM.getFileOffset(Loc);
975 unsigned EndOffset =
976 StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
977
978 std::string LocStr = locationToString(Loc, EndOffset - StartOffset);
979 std::string RangeStr = locationToString(Loc, EndOffset - StartOffset);
980 std::string PeekRangeStr;
981
982 if (!(Flags & OperatorToken)) {
983 // Get the token's characters so we can make sure it's a valid token.
984 const char *StartChars = SM.getCharacterData(Loc);
985 std::string Text(StartChars, EndOffset - StartOffset);
986 if (!isValidIdentifier(Text)) {
987 return;
988 }
989 }
990
991 FileInfo *F = getFileInfo(Loc);
992
993 std::string SymbolList;
994
995 // Reserve space in symbolList for everything in `symbols`. `symbols` can
996 // contain some very long strings.
997 size_t Total = 0;
998 for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
999 Total += It->length();
1000 }
1001
1002 // Space for commas.
1003 Total += Symbols.size() - 1;
1004 SymbolList.reserve(Total);
1005
1006 // For each symbol, generate one "target":1 item. We want to find this line
1007 // if someone searches for any one of these symbols.
1008 for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
1009 std::string Symbol = *It;
1010
1011 if (!(Flags & NoCrossref)) {
1012 JSONFormatter Fmt;
1013
1014 Fmt.add("loc", LocStr);
1015 Fmt.add("target", 1);
1016 Fmt.add("kind", Kind);
1017 Fmt.add("pretty", QualName);
1018 Fmt.add("sym", Symbol);
1019 if (!TokenContext.Name.empty()) {
1020 Fmt.add("context", TokenContext.Name);
1021 }
1022 std::string ContextSymbol = concatSymbols(TokenContext.Symbols);
1023 if (!ContextSymbol.empty()) {
1024 Fmt.add("contextsym", ContextSymbol);
1025 }
1026 if (PeekRange.isValid()) {
1027 PeekRangeStr = lineRangeToString(PeekRange);
1028 if (!PeekRangeStr.empty()) {
1029 Fmt.add("peekRange", PeekRangeStr);
1030 }
1031 }
1032
1033 std::string S;
1034 Fmt.format(S);
1035 F->Output.push_back(std::move(S));
1036 }
1037
1038 if (It != Symbols.begin()) {
1039 SymbolList.push_back(',');
1040 }
1041 SymbolList.append(Symbol);
1042 }
1043
1044 // Generate a single "source":1 for all the symbols. If we search from here,
1045 // we want to union the results for every symbol in `symbols`.
1046 JSONFormatter Fmt;
1047
1048 Fmt.add("loc", RangeStr);
1049 Fmt.add("source", 1);
1050
1051 if (NestingRange.isValid()) {
1052 std::string NestingRangeStr = fullRangeToString(NestingRange);
1053 if (!NestingRangeStr.empty()) {
1054 Fmt.add("nestingRange", NestingRangeStr);
1055 }
1056 }
1057
1058 std::string Syntax;
1059 if (Flags & NoCrossref) {
1060 Fmt.add("syntax", "");
1061 } else {
1062 Syntax = Kind;
1063 Syntax.push_back(',');
1064 Syntax.append(SyntaxKind);
1065 Fmt.add("syntax", Syntax);
1066 }
1067
1068 std::string Pretty(SyntaxKind);
1069 Pretty.push_back(' ');
1070 Pretty.append(QualName);
1071 Fmt.add("pretty", Pretty);
1072
1073 Fmt.add("sym", SymbolList);
1074
1075 if (Flags & NoCrossref) {
1076 Fmt.add("no_crossref", 1);
1077 }
1078
1079 std::string Buf;
1080 Fmt.format(Buf);
1081 F->Output.push_back(std::move(Buf));
1082 }
1083
visitIdentifier(const char * Kind,const char * SyntaxKind,std::string QualName,SourceLocation Loc,std::string Symbol,Context TokenContext=Context (),int Flags=0,SourceRange PeekRange=SourceRange (),SourceRange NestingRange=SourceRange ())1084 void visitIdentifier(const char *Kind, const char *SyntaxKind,
1085 std::string QualName, SourceLocation Loc, std::string Symbol,
1086 Context TokenContext = Context(), int Flags = 0,
1087 SourceRange PeekRange = SourceRange(),
1088 SourceRange NestingRange = SourceRange()) {
1089 std::vector<std::string> V = {Symbol};
1090 visitIdentifier(Kind, SyntaxKind, QualName, Loc, V, TokenContext, Flags,
1091 PeekRange, NestingRange);
1092 }
1093
normalizeLocation(SourceLocation * Loc)1094 void normalizeLocation(SourceLocation *Loc) {
1095 *Loc = SM.getSpellingLoc(*Loc);
1096 }
1097
1098 // For cases where the left-brace is not directly accessible from the AST,
1099 // helper to use the lexer to find the brace. Make sure you're picking the
1100 // start location appropriately!
findLeftBraceFromLoc(SourceLocation Loc)1101 SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
1102 return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
1103 }
1104
1105 // If the provided statement is compound, return its range.
getCompoundStmtRange(Stmt * D)1106 SourceRange getCompoundStmtRange(Stmt* D) {
1107 if (!D) {
1108 return SourceRange();
1109 }
1110
1111 CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
1112 if (D2) {
1113 return D2->getSourceRange();
1114 }
1115
1116 return SourceRange();
1117 }
1118
getFunctionPeekRange(FunctionDecl * D)1119 SourceRange getFunctionPeekRange(FunctionDecl* D) {
1120 // We always start at the start of the function decl, which may include the
1121 // return type on a separate line.
1122 SourceLocation Start = D->getBeginLoc();
1123
1124 // By default, we end at the line containing the function's name.
1125 SourceLocation End = D->getLocation();
1126
1127 std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1128
1129 // But if there are parameters, we want to include those as well.
1130 for (ParmVarDecl* Param : D->parameters()) {
1131 std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation());
1132
1133 // It's possible there are macros involved or something. We don't include
1134 // the parameters in that case.
1135 if (ParamLoc.first == FuncLoc.first) {
1136 // Assume parameters are in order, so we always take the last one.
1137 End = Param->getEndLoc();
1138 }
1139 }
1140
1141 return SourceRange(Start, End);
1142 }
1143
getTagPeekRange(TagDecl * D)1144 SourceRange getTagPeekRange(TagDecl* D) {
1145 SourceLocation Start = D->getBeginLoc();
1146
1147 // By default, we end at the line containing the name.
1148 SourceLocation End = D->getLocation();
1149
1150 std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1151
1152 if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) {
1153 // But if there are parameters, we want to include those as well.
1154 for (CXXBaseSpecifier& Base : D2->bases()) {
1155 std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc());
1156
1157 // It's possible there are macros involved or something. We don't include
1158 // the parameters in that case.
1159 if (Loc.first == FuncLoc.first) {
1160 // Assume parameters are in order, so we always take the last one.
1161 End = Base.getEndLoc();
1162 }
1163 }
1164 }
1165
1166 return SourceRange(Start, End);
1167 }
1168
getCommentRange(NamedDecl * D)1169 SourceRange getCommentRange(NamedDecl* D) {
1170 const RawComment* RC =
1171 AstContext->getRawCommentForDeclNoCache(D);
1172 if (!RC) {
1173 return SourceRange();
1174 }
1175
1176 return RC->getSourceRange();
1177 }
1178
1179 // Sanity checks that all ranges are in the same file, returning the first if
1180 // they're in different files. Unions the ranges based on which is first.
combineRanges(SourceRange Range1,SourceRange Range2)1181 SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
1182 if (Range1.isInvalid()) {
1183 return Range2;
1184 }
1185 if (Range2.isInvalid()) {
1186 return Range1;
1187 }
1188
1189 std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin());
1190 std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd());
1191 std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin());
1192 std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd());
1193
1194 if (End1.first != Begin2.first) {
1195 // Something weird is probably happening with the preprocessor. Just
1196 // return the first range.
1197 return Range1;
1198 }
1199
1200 // See which range comes first.
1201 if (Begin1.second <= End2.second) {
1202 return SourceRange(Range1.getBegin(), Range2.getEnd());
1203 } else {
1204 return SourceRange(Range2.getBegin(), Range1.getEnd());
1205 }
1206 }
1207
1208 // Given a location and a range, returns the range if:
1209 // - The location and the range live in the same file.
1210 // - The range is well ordered (end is not before begin).
1211 // Returns an empty range otherwise.
validateRange(SourceLocation Loc,SourceRange Range)1212 SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
1213 std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
1214 std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
1215 std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
1216
1217 if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
1218 return SourceRange();
1219 }
1220
1221 if (Begin.second >= End.second) {
1222 return SourceRange();
1223 }
1224
1225 return Range;
1226 }
1227
VisitNamedDecl(NamedDecl * D)1228 bool VisitNamedDecl(NamedDecl *D) {
1229 SourceLocation Loc = D->getLocation();
1230
1231 // If the token is from a macro expansion and the expansion location
1232 // is interesting, use that instead as it tends to be more useful.
1233 SourceLocation expandedLoc = Loc;
1234 if (SM.isMacroBodyExpansion(Loc)) {
1235 Loc = SM.getFileLoc(Loc);
1236 }
1237
1238 normalizeLocation(&Loc);
1239 if (!isInterestingLocation(Loc)) {
1240 return true;
1241 }
1242
1243 if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
1244 // Unnamed parameter in function proto.
1245 return true;
1246 }
1247
1248 int Flags = 0;
1249 const char *Kind = "def";
1250 const char *PrettyKind = "?";
1251 SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
1252 // The nesting range identifies the left brace and right brace, which
1253 // heavily depends on the AST node type.
1254 SourceRange NestingRange;
1255 if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
1256 if (D2->isTemplateInstantiation()) {
1257 D = D2->getTemplateInstantiationPattern();
1258 }
1259 Kind = D2->isThisDeclarationADefinition() ? "def" : "decl";
1260 PrettyKind = "function";
1261 PeekRange = getFunctionPeekRange(D2);
1262
1263 // Only emit the nesting range if:
1264 // - This is a definition AND
1265 // - This isn't a template instantiation. Function templates'
1266 // instantiations can end up as a definition with a Loc at their point
1267 // of declaration but with the CompoundStmt of the template's
1268 // point of definition. This really messes up the nesting range logic.
1269 // At the time of writing this, the test repo's `big_header.h`'s
1270 // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
1271 // instantiated by `big_cpp.cpp` triggers this phenomenon.
1272 //
1273 // Note: As covered elsewhere, template processing is tricky and it's
1274 // conceivable that we may change traversal patterns in the future,
1275 // mooting this guard.
1276 if (D2->isThisDeclarationADefinition() &&
1277 !D2->isTemplateInstantiation()) {
1278 // The CompoundStmt range is the brace range.
1279 NestingRange = getCompoundStmtRange(D2->getBody());
1280 }
1281 } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
1282 Kind = D2->isThisDeclarationADefinition() ? "def" : "decl";
1283 PrettyKind = "type";
1284
1285 if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
1286 PeekRange = getTagPeekRange(D2);
1287 NestingRange = D2->getBraceRange();
1288 } else {
1289 PeekRange = SourceRange();
1290 }
1291 } else if (isa<TypedefNameDecl>(D)) {
1292 Kind = "def";
1293 PrettyKind = "type";
1294 PeekRange = SourceRange(Loc, Loc);
1295 } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
1296 if (D2->isLocalVarDeclOrParm()) {
1297 Flags = NoCrossref;
1298 }
1299
1300 Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
1301 ? "decl"
1302 : "def";
1303 PrettyKind = "variable";
1304 } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
1305 Kind = "def";
1306 PrettyKind = "namespace";
1307 PeekRange = SourceRange(Loc, Loc);
1308 NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
1309 if (D2) {
1310 // There's no exposure of the left brace so we have to find it.
1311 NestingRange = SourceRange(
1312 findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc),
1313 D2->getRBraceLoc());
1314 }
1315 } else if (isa<FieldDecl>(D)) {
1316 Kind = "def";
1317 PrettyKind = "field";
1318 } else if (isa<EnumConstantDecl>(D)) {
1319 Kind = "def";
1320 PrettyKind = "enum constant";
1321 } else {
1322 return true;
1323 }
1324
1325 SourceRange CommentRange = getCommentRange(D);
1326 PeekRange = combineRanges(PeekRange, CommentRange);
1327 PeekRange = validateRange(Loc, PeekRange);
1328 NestingRange = validateRange(Loc, NestingRange);
1329
1330 std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)};
1331 if (CXXMethodDecl::classof(D)) {
1332 Symbols.clear();
1333 findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols);
1334 }
1335
1336 // In the case of destructors, Loc might point to the ~ character. In that
1337 // case we want to skip to the name of the class. However, Loc might also
1338 // point to other places that generate destructors, such as the use site of
1339 // a macro that expands to generate a destructor, or a lambda (apparently
1340 // clang 8 creates a destructor declaration for at least some lambdas). In
1341 // the former case we'll use the macro use site as the location, and in the
1342 // latter we'll just drop the declaration.
1343 if (isa<CXXDestructorDecl>(D)) {
1344 PrettyKind = "destructor";
1345 const char *P = SM.getCharacterData(Loc);
1346 if (*P == '~') {
1347 // Advance Loc to the class name
1348 P++;
1349
1350 unsigned Skipped = 1;
1351 while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
1352 P++;
1353 Skipped++;
1354 }
1355
1356 Loc = Loc.getLocWithOffset(Skipped);
1357 } else {
1358 // See if the destructor is coming from a macro expansion
1359 P = SM.getCharacterData(expandedLoc);
1360 if (*P != '~') {
1361 // It's not
1362 return true;
1363 }
1364 // It is, so just use Loc as-is
1365 }
1366 }
1367
1368 visitIdentifier(Kind, PrettyKind, getQualifiedName(D), Loc, Symbols,
1369 getContext(D), Flags, PeekRange, NestingRange);
1370
1371 return true;
1372 }
1373
VisitCXXConstructExpr(CXXConstructExpr * E)1374 bool VisitCXXConstructExpr(CXXConstructExpr *E) {
1375 SourceLocation Loc = E->getBeginLoc();
1376 normalizeLocation(&Loc);
1377 if (!isInterestingLocation(Loc)) {
1378 return true;
1379 }
1380
1381 FunctionDecl *Ctor = E->getConstructor();
1382 if (Ctor->isTemplateInstantiation()) {
1383 Ctor = Ctor->getTemplateInstantiationPattern();
1384 }
1385 std::string Mangled = getMangledName(CurMangleContext, Ctor);
1386
1387 // FIXME: Need to do something different for list initialization.
1388
1389 visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
1390 getContext(Loc));
1391
1392 return true;
1393 }
1394
VisitCallExpr(CallExpr * E)1395 bool VisitCallExpr(CallExpr *E) {
1396 Decl *Callee = E->getCalleeDecl();
1397 if (!Callee || !FunctionDecl::classof(Callee)) {
1398 return true;
1399 }
1400
1401 const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
1402
1403 SourceLocation Loc;
1404
1405 const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
1406 if (F->isTemplateInstantiation()) {
1407 NamedCallee = F->getTemplateInstantiationPattern();
1408 }
1409
1410 std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
1411 int Flags = 0;
1412
1413 Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
1414
1415 if (CXXOperatorCallExpr::classof(E)) {
1416 // Just take the first token.
1417 CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
1418 Loc = Op->getOperatorLoc();
1419 Flags |= OperatorToken;
1420 } else if (MemberExpr::classof(CalleeExpr)) {
1421 MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
1422 Loc = Member->getMemberLoc();
1423 } else if (DeclRefExpr::classof(CalleeExpr)) {
1424 // We handle this in VisitDeclRefExpr.
1425 return true;
1426 } else {
1427 return true;
1428 }
1429
1430 normalizeLocation(&Loc);
1431
1432 if (!isInterestingLocation(Loc)) {
1433 return true;
1434 }
1435
1436 visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled,
1437 getContext(Loc), Flags);
1438
1439 return true;
1440 }
1441
VisitTagTypeLoc(TagTypeLoc L)1442 bool VisitTagTypeLoc(TagTypeLoc L) {
1443 SourceLocation Loc = L.getBeginLoc();
1444 normalizeLocation(&Loc);
1445 if (!isInterestingLocation(Loc)) {
1446 return true;
1447 }
1448
1449 TagDecl *Decl = L.getDecl();
1450 std::string Mangled = getMangledName(CurMangleContext, Decl);
1451 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1452 getContext(Loc));
1453 return true;
1454 }
1455
VisitTypedefTypeLoc(TypedefTypeLoc L)1456 bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
1457 SourceLocation Loc = L.getBeginLoc();
1458 normalizeLocation(&Loc);
1459 if (!isInterestingLocation(Loc)) {
1460 return true;
1461 }
1462
1463 NamedDecl *Decl = L.getTypedefNameDecl();
1464 std::string Mangled = getMangledName(CurMangleContext, Decl);
1465 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1466 getContext(Loc));
1467 return true;
1468 }
1469
VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L)1470 bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
1471 SourceLocation Loc = L.getBeginLoc();
1472 normalizeLocation(&Loc);
1473 if (!isInterestingLocation(Loc)) {
1474 return true;
1475 }
1476
1477 NamedDecl *Decl = L.getDecl();
1478 std::string Mangled = getMangledName(CurMangleContext, Decl);
1479 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1480 getContext(Loc));
1481 return true;
1482 }
1483
VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L)1484 bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
1485 SourceLocation Loc = L.getBeginLoc();
1486 normalizeLocation(&Loc);
1487 if (!isInterestingLocation(Loc)) {
1488 return true;
1489 }
1490
1491 TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
1492 if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
1493 NamedDecl *Decl = D->getTemplatedDecl();
1494 std::string Mangled = getMangledName(CurMangleContext, Decl);
1495 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1496 getContext(Loc));
1497 } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
1498 NamedDecl *Decl = D->getTemplatedDecl();
1499 std::string Mangled = getMangledName(CurMangleContext, Decl);
1500 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1501 getContext(Loc));
1502 }
1503
1504 return true;
1505 }
1506
VisitDeclRefExpr(DeclRefExpr * E)1507 bool VisitDeclRefExpr(DeclRefExpr *E) {
1508 SourceLocation Loc = E->getExprLoc();
1509 normalizeLocation(&Loc);
1510 if (!isInterestingLocation(Loc)) {
1511 return true;
1512 }
1513
1514 if (E->hasQualifier()) {
1515 Loc = E->getNameInfo().getLoc();
1516 normalizeLocation(&Loc);
1517 }
1518
1519 NamedDecl *Decl = E->getDecl();
1520 if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
1521 int Flags = 0;
1522 if (D2->isLocalVarDeclOrParm()) {
1523 Flags = NoCrossref;
1524 }
1525 std::string Mangled = getMangledName(CurMangleContext, Decl);
1526 visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
1527 getContext(Loc), Flags);
1528 } else if (isa<FunctionDecl>(Decl)) {
1529 const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
1530 if (F->isTemplateInstantiation()) {
1531 Decl = F->getTemplateInstantiationPattern();
1532 }
1533
1534 std::string Mangled = getMangledName(CurMangleContext, Decl);
1535 visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
1536 getContext(Loc));
1537 } else if (isa<EnumConstantDecl>(Decl)) {
1538 std::string Mangled = getMangledName(CurMangleContext, Decl);
1539 visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
1540 getContext(Loc));
1541 }
1542
1543 return true;
1544 }
1545
VisitCXXConstructorDecl(CXXConstructorDecl * D)1546 bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
1547 if (!isInterestingLocation(D->getLocation())) {
1548 return true;
1549 }
1550
1551 for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
1552 It != D->init_end(); ++It) {
1553 const CXXCtorInitializer *Ci = *It;
1554 if (!Ci->getMember() || !Ci->isWritten()) {
1555 continue;
1556 }
1557
1558 SourceLocation Loc = Ci->getMemberLocation();
1559 normalizeLocation(&Loc);
1560 if (!isInterestingLocation(Loc)) {
1561 continue;
1562 }
1563
1564 FieldDecl *Member = Ci->getMember();
1565 std::string Mangled = getMangledName(CurMangleContext, Member);
1566 visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
1567 getContext(D));
1568 }
1569
1570 return true;
1571 }
1572
VisitMemberExpr(MemberExpr * E)1573 bool VisitMemberExpr(MemberExpr *E) {
1574 SourceLocation Loc = E->getExprLoc();
1575 normalizeLocation(&Loc);
1576 if (!isInterestingLocation(Loc)) {
1577 return true;
1578 }
1579
1580 ValueDecl *Decl = E->getMemberDecl();
1581 if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
1582 std::string Mangled = getMangledName(CurMangleContext, Field);
1583 visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
1584 getContext(Loc));
1585 }
1586 return true;
1587 }
1588
VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr * E)1589 bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
1590 SourceLocation Loc = E->getMemberLoc();
1591 normalizeLocation(&Loc);
1592 if (!isInterestingLocation(Loc)) {
1593 return true;
1594 }
1595
1596 if (TemplateStack) {
1597 TemplateStack->visitDependent(Loc);
1598 }
1599 return true;
1600 }
1601
macroDefined(const Token & Tok,const MacroDirective * Macro)1602 void macroDefined(const Token &Tok, const MacroDirective *Macro) {
1603 if (Macro->getMacroInfo()->isBuiltinMacro()) {
1604 return;
1605 }
1606 SourceLocation Loc = Tok.getLocation();
1607 normalizeLocation(&Loc);
1608 if (!isInterestingLocation(Loc)) {
1609 return;
1610 }
1611
1612 IdentifierInfo *Ident = Tok.getIdentifierInfo();
1613 if (Ident) {
1614 std::string Mangled =
1615 std::string("M_") + mangleLocation(Loc, Ident->getName());
1616 visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
1617 }
1618 }
1619
macroUsed(const Token & Tok,const MacroInfo * Macro)1620 void macroUsed(const Token &Tok, const MacroInfo *Macro) {
1621 if (!Macro) {
1622 return;
1623 }
1624 if (Macro->isBuiltinMacro()) {
1625 return;
1626 }
1627 SourceLocation Loc = Tok.getLocation();
1628 normalizeLocation(&Loc);
1629 if (!isInterestingLocation(Loc)) {
1630 return;
1631 }
1632
1633 IdentifierInfo *Ident = Tok.getIdentifierInfo();
1634 if (Ident) {
1635 std::string Mangled =
1636 std::string("M_") +
1637 mangleLocation(Macro->getDefinitionLoc(), Ident->getName());
1638 visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
1639 }
1640 }
1641 };
1642
MacroDefined(const Token & Tok,const MacroDirective * Md)1643 void PreprocessorHook::MacroDefined(const Token &Tok,
1644 const MacroDirective *Md) {
1645 Indexer->macroDefined(Tok, Md);
1646 }
1647
MacroExpands(const Token & Tok,const MacroDefinition & Md,SourceRange Range,const MacroArgs * Ma)1648 void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
1649 SourceRange Range, const MacroArgs *Ma) {
1650 Indexer->macroUsed(Tok, Md.getMacroInfo());
1651 }
1652
MacroUndefined(const Token & Tok,const MacroDefinition & Md,const MacroDirective * Undef)1653 void PreprocessorHook::MacroUndefined(const Token &Tok,
1654 const MacroDefinition &Md,
1655 const MacroDirective *Undef)
1656 {
1657 Indexer->macroUsed(Tok, Md.getMacroInfo());
1658 }
1659
Defined(const Token & Tok,const MacroDefinition & Md,SourceRange Range)1660 void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
1661 SourceRange Range) {
1662 Indexer->macroUsed(Tok, Md.getMacroInfo());
1663 }
1664
Ifdef(SourceLocation Loc,const Token & Tok,const MacroDefinition & Md)1665 void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
1666 const MacroDefinition &Md) {
1667 Indexer->macroUsed(Tok, Md.getMacroInfo());
1668 }
1669
Ifndef(SourceLocation Loc,const Token & Tok,const MacroDefinition & Md)1670 void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
1671 const MacroDefinition &Md) {
1672 Indexer->macroUsed(Tok, Md.getMacroInfo());
1673 }
1674
1675 class IndexAction : public PluginASTAction {
1676 protected:
CreateASTConsumer(CompilerInstance & CI,llvm::StringRef F)1677 std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
1678 llvm::StringRef F) {
1679 return make_unique<IndexConsumer>(CI);
1680 }
1681
ParseArgs(const CompilerInstance & CI,const std::vector<std::string> & Args)1682 bool ParseArgs(const CompilerInstance &CI,
1683 const std::vector<std::string> &Args) {
1684 if (Args.size() != 3) {
1685 DiagnosticsEngine &D = CI.getDiagnostics();
1686 unsigned DiagID = D.getCustomDiagID(
1687 DiagnosticsEngine::Error,
1688 "Need arguments for the source, output, and object directories");
1689 D.Report(DiagID);
1690 return false;
1691 }
1692
1693 // Load our directories
1694 Srcdir = getAbsolutePath(Args[0]);
1695 if (Srcdir.empty()) {
1696 DiagnosticsEngine &D = CI.getDiagnostics();
1697 unsigned DiagID = D.getCustomDiagID(
1698 DiagnosticsEngine::Error, "Source directory '%0' does not exist");
1699 D.Report(DiagID) << Args[0];
1700 return false;
1701 }
1702
1703 ensurePath(Args[1] + PATHSEP_STRING);
1704 Outdir = getAbsolutePath(Args[1]);
1705 Outdir += PATHSEP_STRING;
1706
1707 Objdir = getAbsolutePath(Args[2]);
1708 if (Objdir.empty()) {
1709 DiagnosticsEngine &D = CI.getDiagnostics();
1710 unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
1711 "Objdir '%0' does not exist");
1712 D.Report(DiagID) << Args[2];
1713 return false;
1714 }
1715 Objdir += PATHSEP_STRING;
1716
1717 printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
1718 Objdir.c_str());
1719
1720 return true;
1721 }
1722
printHelp(llvm::raw_ostream & Ros)1723 void printHelp(llvm::raw_ostream &Ros) {
1724 Ros << "Help for mozsearch plugin goes here\n";
1725 }
1726 };
1727
1728 static FrontendPluginRegistry::Add<IndexAction>
1729 Y("mozsearch-index", "create the mozsearch index database");
1730