1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "clang/AST/AST.h"
7 #include "clang/AST/ASTConsumer.h"
8 #include "clang/AST/ASTContext.h"
9 #include "clang/AST/Expr.h"
10 #include "clang/AST/ExprCXX.h"
11 #include "clang/AST/Mangle.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Basic/Version.h"
15 #include "clang/Frontend/CompilerInstance.h"
16 #include "clang/Frontend/FrontendPluginRegistry.h"
17 #include "clang/Lex/Lexer.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <iostream>
24 #include <map>
25 #include <memory>
26 #include <sstream>
27 #include <tuple>
28 #include <unordered_set>
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 
33 #include "FileOperations.h"
34 #include "JSONFormatter.h"
35 #include "StringOperations.h"
36 
37 #if CLANG_VERSION_MAJOR < 8
38 // Starting with Clang 8.0 some basic functions have been renamed
39 #define getBeginLoc getLocStart
40 #define getEndLoc getLocEnd
41 #endif
42 // We want std::make_unique, but that's only available in c++14.  In versions
43 // prior to that, we need to fall back to llvm's make_unique.  It's also the
44 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to
45 // build with c++11, at least as suggested by the llvm-config --cxxflags on
46 // non-windows platforms.  mozilla-central seems to build with -std=c++17 on
47 // windows so we need to make this decision based on __cplusplus instead of
48 // the CLANG_VERSION_MAJOR.
49 #if __cplusplus < 201402L
50 using llvm::make_unique;
51 #else
52 using std::make_unique;
53 #endif
54 
55 using namespace clang;
56 
57 const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
58 
59 // Absolute path to directory containing source code.
60 std::string Srcdir;
61 
62 // Absolute path to objdir (including generated code).
63 std::string Objdir;
64 
65 // Absolute path where analysis JSON output will be stored.
66 std::string Outdir;
67 
68 #if !defined(_WIN32) && !defined(_WIN64)
69 #include <sys/time.h>
70 
time()71 static double time() {
72   struct timeval Tv;
73   gettimeofday(&Tv, nullptr);
74   return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
75 }
76 #endif
77 
78 // Return true if |input| is a valid C++ identifier. We don't want to generate
79 // analysis information for operators, string literals, etc. by accident since
80 // it trips up consumers of the data.
isValidIdentifier(std::string Input)81 static bool isValidIdentifier(std::string Input) {
82   for (char C : Input) {
83     if (!(isalpha(C) || isdigit(C) || C == '_')) {
84       return false;
85     }
86   }
87   return true;
88 }
89 
90 struct RAIITracer {
RAIITracerRAIITracer91   RAIITracer(const char *log) : mLog(log) {
92     printf("<%s>\n", mLog);
93   }
94 
~RAIITracerRAIITracer95   ~RAIITracer() {
96     printf("</%s>\n", mLog);
97   }
98 
99   const char* mLog;
100 };
101 
102 #define TRACEFUNC RAIITracer tracer(__FUNCTION__);
103 
104 class IndexConsumer;
105 
106 // For each C++ file seen by the analysis (.cpp or .h), we track a
107 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether
108 // it's in the source dir or the objdir). We also store the analysis output
109 // here.
110 struct FileInfo {
FileInfoFileInfo111   FileInfo(std::string &Rname) : Realname(Rname) {
112     if (Rname.compare(0, Objdir.length(), Objdir) == 0) {
113       // We're in the objdir, so we are probably a generated header
114       // We use the escape character to indicate the objdir nature.
115       // Note that output also has the `/' already placed
116       Interesting = true;
117       Generated = true;
118       Realname.replace(0, Objdir.length(), GENERATED);
119       return;
120     }
121 
122     // Empty filenames can get turned into Srcdir when they are resolved as
123     // absolute paths, so we should exclude files that are exactly equal to
124     // Srcdir or anything outside Srcdir.
125     Interesting = (Rname.length() > Srcdir.length()) &&
126                   (Rname.compare(0, Srcdir.length(), Srcdir) == 0);
127     Generated = false;
128     if (Interesting) {
129       // Remove the trailing `/' as well.
130       Realname.erase(0, Srcdir.length() + 1);
131     }
132   }
133   std::string Realname;
134   std::vector<std::string> Output;
135   bool Interesting;
136   bool Generated;
137 };
138 
139 class IndexConsumer;
140 
141 class PreprocessorHook : public PPCallbacks {
142   IndexConsumer *Indexer;
143 
144 public:
PreprocessorHook(IndexConsumer * C)145   PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
146 
147   virtual void MacroDefined(const Token &Tok,
148                             const MacroDirective *Md) override;
149 
150   virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
151                             SourceRange Range, const MacroArgs *Ma) override;
152   virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
153                               const MacroDirective *Undef) override;
154   virtual void Defined(const Token &Tok, const MacroDefinition &Md,
155                        SourceRange Range) override;
156   virtual void Ifdef(SourceLocation Loc, const Token &Tok,
157                      const MacroDefinition &Md) override;
158   virtual void Ifndef(SourceLocation Loc, const Token &Tok,
159                       const MacroDefinition &Md) override;
160 };
161 
162 class IndexConsumer : public ASTConsumer,
163                       public RecursiveASTVisitor<IndexConsumer>,
164                       public DiagnosticConsumer {
165 private:
166   CompilerInstance &CI;
167   SourceManager &SM;
168   LangOptions &LO;
169   std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
170   MangleContext *CurMangleContext;
171   ASTContext *AstContext;
172 
173   typedef RecursiveASTVisitor<IndexConsumer> Super;
174 
175   // Tracks the set of declarations that the current expression/statement is
176   // nested inside of.
177   struct AutoSetContext {
AutoSetContextIndexConsumer::AutoSetContext178     AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false)
179         : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
180       this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false);
181       Self->CurDeclContext = this;
182     }
183 
~AutoSetContextIndexConsumer::AutoSetContext184     ~AutoSetContext() { Self->CurDeclContext = Prev; }
185 
186     IndexConsumer *Self;
187     AutoSetContext *Prev;
188     NamedDecl *Decl;
189     bool VisitImplicit;
190   };
191   AutoSetContext *CurDeclContext;
192 
getFileInfo(SourceLocation Loc)193   FileInfo *getFileInfo(SourceLocation Loc) {
194     FileID Id = SM.getFileID(Loc);
195 
196     std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
197     It = FileMap.find(Id);
198     if (It == FileMap.end()) {
199       // We haven't seen this file before. We need to make the FileInfo
200       // structure information ourselves
201       std::string Filename = SM.getFilename(Loc);
202       std::string Absolute;
203       // If Loc is a macro id rather than a file id, it Filename might be
204       // empty. Also for some types of file locations that are clang-internal
205       // like "<scratch>" it can return an empty Filename. In these cases we
206       // want to leave Absolute as empty.
207       if (!Filename.empty()) {
208         Absolute = getAbsolutePath(Filename);
209         if (Absolute.empty()) {
210           Absolute = Filename;
211         }
212       }
213       std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute);
214       It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
215     }
216     return It->second.get();
217   }
218 
219   // Helpers for processing declarations
220   // Should we ignore this location?
isInterestingLocation(SourceLocation Loc)221   bool isInterestingLocation(SourceLocation Loc) {
222     if (Loc.isInvalid()) {
223       return false;
224     }
225 
226     return getFileInfo(Loc)->Interesting;
227   }
228 
229   // Convert location to "line:column" or "line:column-column" given length.
230   // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
231   // column is 0-based and unpadded.
locationToString(SourceLocation Loc,size_t Length=0)232   std::string locationToString(SourceLocation Loc, size_t Length = 0) {
233     std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc);
234 
235     bool IsInvalid;
236     unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
237     if (IsInvalid) {
238       return "";
239     }
240     unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
241     if (IsInvalid) {
242       return "";
243     }
244 
245     if (Length) {
246       return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
247     } else {
248       return stringFormat("%05d:%d", Line, Column - 1);
249     }
250   }
251 
252   // Convert SourceRange to "line-line".
253   // In the resulting string rep, line is 1-based.
lineRangeToString(SourceRange Range)254   std::string lineRangeToString(SourceRange Range) {
255     std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
256     std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
257 
258     bool IsInvalid;
259     unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
260     if (IsInvalid) {
261       return "";
262     }
263     unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
264     if (IsInvalid) {
265       return "";
266     }
267 
268     return stringFormat("%d-%d", Line1, Line2);
269   }
270 
271   // Convert SourceRange to "line:column-line:column".
272   // In the resulting string rep, line is 1-based, column is 0-based.
fullRangeToString(SourceRange Range)273   std::string fullRangeToString(SourceRange Range) {
274     std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
275     std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
276 
277     bool IsInvalid;
278     unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
279     if (IsInvalid) {
280       return "";
281     }
282     unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
283     if (IsInvalid) {
284       return "";
285     }
286     unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
287     if (IsInvalid) {
288       return "";
289     }
290     unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
291     if (IsInvalid) {
292       return "";
293     }
294 
295     return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
296   }
297 
298   // Returns the qualified name of `d` without considering template parameters.
getQualifiedName(const NamedDecl * D)299   std::string getQualifiedName(const NamedDecl *D) {
300     const DeclContext *Ctx = D->getDeclContext();
301     if (Ctx->isFunctionOrMethod()) {
302       return D->getQualifiedNameAsString();
303     }
304 
305     std::vector<const DeclContext *> Contexts;
306 
307     // Collect contexts.
308     while (Ctx && isa<NamedDecl>(Ctx)) {
309       Contexts.push_back(Ctx);
310       Ctx = Ctx->getParent();
311     }
312 
313     std::string Result;
314 
315     std::reverse(Contexts.begin(), Contexts.end());
316 
317     for (const DeclContext *DC : Contexts) {
318       if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
319         Result += Spec->getNameAsString();
320 
321         if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
322           std::string Backing;
323           llvm::raw_string_ostream Stream(Backing);
324           const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
325           printTemplateArgumentList(
326               Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts()));
327           Result += Stream.str();
328         }
329       } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
330         if (Nd->isAnonymousNamespace() || Nd->isInline()) {
331           continue;
332         }
333         Result += Nd->getNameAsString();
334       } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
335         if (!Rd->getIdentifier()) {
336           Result += "(anonymous)";
337         } else {
338           Result += Rd->getNameAsString();
339         }
340       } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
341         Result += Fd->getNameAsString();
342       } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
343         // C++ [dcl.enum]p10: Each enum-name and each unscoped
344         // enumerator is declared in the scope that immediately contains
345         // the enum-specifier. Each scoped enumerator is declared in the
346         // scope of the enumeration.
347         if (Ed->isScoped() || Ed->getIdentifier())
348           Result += Ed->getNameAsString();
349         else
350           continue;
351       } else {
352         Result += cast<NamedDecl>(DC)->getNameAsString();
353       }
354       Result += "::";
355     }
356 
357     if (D->getDeclName())
358       Result += D->getNameAsString();
359     else
360       Result += "(anonymous)";
361 
362     return Result;
363   }
364 
mangleLocation(SourceLocation Loc,std::string Backup=std::string ())365   std::string mangleLocation(SourceLocation Loc,
366                              std::string Backup = std::string()) {
367     FileInfo *F = getFileInfo(Loc);
368     std::string Filename = F->Realname;
369     if (Filename.length() == 0 && Backup.length() != 0) {
370       return Backup;
371     }
372     if (F->Generated) {
373       // Since generated files may be different on different platforms,
374       // we need to include a platform-specific thing in the hash. Otherwise
375       // we can end up with hash collisions where different symbols from
376       // different platforms map to the same thing.
377       char* Platform = getenv("MOZSEARCH_PLATFORM");
378       Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
379     }
380     return hash(Filename + std::string("@") + locationToString(Loc));
381   }
382 
mangleQualifiedName(std::string Name)383   std::string mangleQualifiedName(std::string Name) {
384     std::replace(Name.begin(), Name.end(), ' ', '_');
385     return Name;
386   }
387 
getMangledName(clang::MangleContext * Ctx,const clang::NamedDecl * Decl)388   std::string getMangledName(clang::MangleContext *Ctx,
389                              const clang::NamedDecl *Decl) {
390     if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
391       return cast<FunctionDecl>(Decl)->getNameAsString();
392     }
393 
394     if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
395       const DeclContext *DC = Decl->getDeclContext();
396       if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
397           isa<LinkageSpecDecl>(DC) ||
398           // isa<ExternCContextDecl>(DC) ||
399           isa<TagDecl>(DC)) {
400         llvm::SmallVector<char, 512> Output;
401         llvm::raw_svector_ostream Out(Output);
402         if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
403           Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
404         } else if (const CXXDestructorDecl *D =
405                        dyn_cast<CXXDestructorDecl>(Decl)) {
406           Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
407         } else {
408           Ctx->mangleName(Decl, Out);
409         }
410         return Out.str().str();
411       } else {
412         return std::string("V_") + mangleLocation(Decl->getLocation()) +
413                std::string("_") + hash(Decl->getName());
414       }
415     } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) ||
416                isa<ObjCInterfaceDecl>(Decl)) {
417       if (!Decl->getIdentifier()) {
418         // Anonymous.
419         return std::string("T_") + mangleLocation(Decl->getLocation());
420       }
421 
422       return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
423     } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
424       if (!Decl->getIdentifier()) {
425         // Anonymous.
426         return std::string("NS_") + mangleLocation(Decl->getLocation());
427       }
428 
429       return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
430     } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
431       const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
432       return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
433              D2->getNameAsString();
434     } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
435       const RecordDecl *Record = D2->getParent();
436       return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
437              D2->getNameAsString();
438     } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
439       const DeclContext *DC = Decl->getDeclContext();
440       if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
441         return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
442                D2->getNameAsString();
443       }
444     }
445 
446     assert(false);
447     return std::string("");
448   }
449 
debugLocation(SourceLocation Loc)450   void debugLocation(SourceLocation Loc) {
451     std::string S = locationToString(Loc);
452     StringRef Filename = SM.getFilename(Loc);
453     printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
454   }
455 
debugRange(SourceRange Range)456   void debugRange(SourceRange Range) {
457     printf("Range\n");
458     debugLocation(Range.getBegin());
459     debugLocation(Range.getEnd());
460   }
461 
462 public:
IndexConsumer(CompilerInstance & CI)463   IndexConsumer(CompilerInstance &CI)
464       : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr),
465         AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
466     CI.getPreprocessor().addPPCallbacks(
467         make_unique<PreprocessorHook>(this));
468   }
469 
clone(DiagnosticsEngine & Diags) const470   virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
471     return new IndexConsumer(CI);
472   }
473 
474 #if !defined(_WIN32) && !defined(_WIN64)
475   struct AutoTime {
AutoTimeIndexConsumer::AutoTime476     AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
~AutoTimeIndexConsumer::AutoTime477     ~AutoTime() {
478       if (Start) {
479         *Counter += time() - Start;
480       }
481     }
stopIndexConsumer::AutoTime482     void stop() {
483       *Counter += time() - Start;
484       Start = 0;
485     }
486     double *Counter;
487     double Start;
488   };
489 #endif
490 
491   // All we need is to follow the final declaration.
HandleTranslationUnit(ASTContext & Ctx)492   virtual void HandleTranslationUnit(ASTContext &Ctx) {
493     CurMangleContext =
494       clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
495 
496     AstContext = &Ctx;
497     TraverseDecl(Ctx.getTranslationUnitDecl());
498 
499     // Emit the JSON data for all files now.
500     std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
501     for (It = FileMap.begin(); It != FileMap.end(); It++) {
502       if (!It->second->Interesting) {
503         continue;
504       }
505 
506       FileInfo &Info = *It->second;
507 
508       std::string Filename = Outdir + Info.Realname;
509       std::string SrcFilename = Info.Generated
510         ? Objdir + Info.Realname.substr(GENERATED.length())
511         : Srcdir + PATHSEP_STRING + Info.Realname;
512 
513       ensurePath(Filename);
514 
515       // We lock the output file in case some other clang process is trying to
516       // write to it at the same time.
517       AutoLockFile Lock(SrcFilename, Filename);
518 
519       if (!Lock.success()) {
520         fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
521         exit(1);
522       }
523 
524       // Merge our results with the existing lines from the output file.
525       // This ensures that header files that are included multiple times
526       // in different ways are analyzed completely.
527 
528       FILE *Fp = Lock.openFile();
529       if (!Fp) {
530         fprintf(stderr, "Unable to open input file %s\n", Filename.c_str());
531         exit(1);
532       }
533       FILE *OutFp = Lock.openTmp();
534       if (!OutFp) {
535         fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str());
536         exit(1);
537       }
538 
539       // Sort our new results and get an iterator to them
540       std::sort(Info.Output.begin(), Info.Output.end());
541       std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin();
542       std::string LastNewWritten;
543 
544       // Loop over the existing (sorted) lines in the analysis output file.
545       char Buffer[65536];
546       while (fgets(Buffer, sizeof(Buffer), Fp)) {
547         std::string OldLine(Buffer);
548 
549         // Write any results from Info.Output that are lexicographically
550         // smaller than OldLine (read from the existing file), but make sure
551         // to skip duplicates. Keep advacing NewLinesIter until we reach an
552         // entry that is lexicographically greater than OldLine.
553         for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
554           if (*NewLinesIter > OldLine) {
555             break;
556           }
557           if (*NewLinesIter == OldLine) {
558             continue;
559           }
560           if (*NewLinesIter == LastNewWritten) {
561             // dedupe the new entries being written
562             continue;
563           }
564           if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
565             fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
566             exit(1);
567           }
568           LastNewWritten = *NewLinesIter;
569         }
570 
571         // Write the entry read from the existing file.
572         if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
573           fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
574           exit(1);
575         }
576       }
577 
578       // We finished reading from Fp
579       fclose(Fp);
580 
581       // Finish iterating our new results, discarding duplicates
582       for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
583         if (*NewLinesIter == LastNewWritten) {
584           continue;
585         }
586         if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
587           fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
588           exit(1);
589         }
590         LastNewWritten = *NewLinesIter;
591       }
592 
593       // Done writing all the things, close it and replace the old output file
594       // with the new one.
595       fclose(OutFp);
596       if (!Lock.moveTmp()) {
597         fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno);
598         exit(1);
599       }
600     }
601   }
602 
603   // Return a list of mangled names of all the methods that the given method
604   // overrides.
findOverriddenMethods(const CXXMethodDecl * Method,std::vector<std::string> & Symbols)605   void findOverriddenMethods(const CXXMethodDecl *Method,
606                              std::vector<std::string> &Symbols) {
607     std::string Mangled = getMangledName(CurMangleContext, Method);
608     Symbols.push_back(Mangled);
609 
610     CXXMethodDecl::method_iterator Iter = Method->begin_overridden_methods();
611     CXXMethodDecl::method_iterator End = Method->end_overridden_methods();
612     for (; Iter != End; Iter++) {
613       const CXXMethodDecl *Decl = *Iter;
614       if (Decl->isTemplateInstantiation()) {
615         Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
616       }
617       return findOverriddenMethods(Decl, Symbols);
618     }
619   }
620 
621   // Unfortunately, we have to override all these methods in order to track the
622   // context we're inside.
623 
TraverseEnumDecl(EnumDecl * D)624   bool TraverseEnumDecl(EnumDecl *D) {
625     AutoSetContext Asc(this, D);
626     return Super::TraverseEnumDecl(D);
627   }
TraverseRecordDecl(RecordDecl * D)628   bool TraverseRecordDecl(RecordDecl *D) {
629     AutoSetContext Asc(this, D);
630     return Super::TraverseRecordDecl(D);
631   }
TraverseCXXRecordDecl(CXXRecordDecl * D)632   bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
633     AutoSetContext Asc(this, D);
634     return Super::TraverseCXXRecordDecl(D);
635   }
TraverseFunctionDecl(FunctionDecl * D)636   bool TraverseFunctionDecl(FunctionDecl *D) {
637     AutoSetContext Asc(this, D);
638     const FunctionDecl *Def;
639     // (See the larger AutoTemplateContext comment for more information.) If a
640     // method on a templated class is declared out-of-line, we need to analyze
641     // the definition inside the scope of the template or else we won't properly
642     // handle member access on the templated type.
643     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
644       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
645     }
646     return Super::TraverseFunctionDecl(D);
647   }
TraverseCXXMethodDecl(CXXMethodDecl * D)648   bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
649     AutoSetContext Asc(this, D);
650     const FunctionDecl *Def;
651     // See TraverseFunctionDecl.
652     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
653       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
654     }
655     return Super::TraverseCXXMethodDecl(D);
656   }
TraverseCXXConstructorDecl(CXXConstructorDecl * D)657   bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
658     AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
659     const FunctionDecl *Def;
660     // See TraverseFunctionDecl.
661     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
662       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
663     }
664     return Super::TraverseCXXConstructorDecl(D);
665   }
TraverseCXXConversionDecl(CXXConversionDecl * D)666   bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
667     AutoSetContext Asc(this, D);
668     const FunctionDecl *Def;
669     // See TraverseFunctionDecl.
670     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
671       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
672     }
673     return Super::TraverseCXXConversionDecl(D);
674   }
TraverseCXXDestructorDecl(CXXDestructorDecl * D)675   bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
676     AutoSetContext Asc(this, D);
677     const FunctionDecl *Def;
678     // See TraverseFunctionDecl.
679     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
680       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
681     }
682     return Super::TraverseCXXDestructorDecl(D);
683   }
684 
685   // Used to keep track of the context in which a token appears.
686   struct Context {
687     // Ultimately this becomes the "context" JSON property.
688     std::string Name;
689 
690     // Ultimately this becomes the "contextsym" JSON property.
691     std::vector<std::string> Symbols;
692 
ContextIndexConsumer::Context693     Context() {}
ContextIndexConsumer::Context694     Context(std::string Name, std::vector<std::string> Symbols)
695         : Name(Name), Symbols(Symbols) {}
696   };
697 
translateContext(NamedDecl * D)698   Context translateContext(NamedDecl *D) {
699     const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
700     if (F && F->isTemplateInstantiation()) {
701       D = F->getTemplateInstantiationPattern();
702     }
703 
704     std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)};
705     if (CXXMethodDecl::classof(D)) {
706       Symbols.clear();
707       findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols);
708     }
709     return Context(D->getQualifiedNameAsString(), Symbols);
710   }
711 
getContext(SourceLocation Loc)712   Context getContext(SourceLocation Loc) {
713     if (SM.isMacroBodyExpansion(Loc)) {
714       // If we're inside a macro definition, we don't return any context. It
715       // will probably not be what the user expects if we do.
716       return Context();
717     }
718 
719     if (CurDeclContext) {
720       return translateContext(CurDeclContext->Decl);
721     }
722     return Context();
723   }
724 
725   // Similar to GetContext(SourceLocation), but it skips the declaration passed
726   // in. This is useful if we want the context of a declaration that's already
727   // on the stack.
getContext(Decl * D)728   Context getContext(Decl *D) {
729     if (SM.isMacroBodyExpansion(D->getLocation())) {
730       // If we're inside a macro definition, we don't return any context. It
731       // will probably not be what the user expects if we do.
732       return Context();
733     }
734 
735     AutoSetContext *Ctxt = CurDeclContext;
736     while (Ctxt) {
737       if (Ctxt->Decl != D) {
738         return translateContext(Ctxt->Decl);
739       }
740       Ctxt = Ctxt->Prev;
741     }
742     return Context();
743   }
744 
concatSymbols(const std::vector<std::string> Symbols)745   static std::string concatSymbols(const std::vector<std::string> Symbols) {
746     if (Symbols.empty()) {
747       return "";
748     }
749 
750     size_t Total = 0;
751     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
752       Total += It->length();
753     }
754     Total += Symbols.size() - 1;
755 
756     std::string SymbolList;
757     SymbolList.reserve(Total);
758 
759     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
760       std::string Symbol = *It;
761 
762       if (It != Symbols.begin()) {
763         SymbolList.push_back(',');
764       }
765       SymbolList.append(Symbol);
766     }
767 
768     return SymbolList;
769   }
770 
771   // Analyzing template code is tricky. Suppose we have this code:
772   //
773   //   template<class T>
774   //   bool Foo(T* ptr) { return T::StaticMethod(ptr); }
775   //
776   // If we analyze the body of Foo without knowing the type T, then we will not
777   // be able to generate any information for StaticMethod. However, analyzing
778   // Foo for every possible instantiation is inefficient and it also generates
779   // too much data in some cases. For example, the following code would generate
780   // one definition of Baz for every instantiation, which is undesirable:
781   //
782   //   template<class T>
783   //   class Bar { struct Baz { ... }; };
784   //
785   // To solve this problem, we analyze templates only once. We do so in a
786   // GatherDependent mode where we look for "dependent scoped member
787   // expressions" (i.e., things like StaticMethod). We keep track of the
788   // locations of these expressions. If we find one or more of them, we analyze
789   // the template for each instantiation, in an AnalyzeDependent mode. This mode
790   // ignores all source locations except for the ones where we found dependent
791   // scoped member expressions before. For these locations, we generate a
792   // separate JSON result for each instantiation.
793   //
794   // We inherit our parent's mode if it is exists.  This is because if our
795   // parent is in analyze mode, it means we've already lived a full life in
796   // gather mode and we must not restart in gather mode or we'll cause the
797   // indexer to visit EVERY identifier, which is way too much data.
798   struct AutoTemplateContext {
AutoTemplateContextIndexConsumer::AutoTemplateContext799     AutoTemplateContext(IndexConsumer *Self)
800         : Self(Self)
801         , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent)
802         , Parent(Self->TemplateStack) {
803       Self->TemplateStack = this;
804     }
805 
~AutoTemplateContextIndexConsumer::AutoTemplateContext806     ~AutoTemplateContext() { Self->TemplateStack = Parent; }
807 
808     // We traverse templates in two modes:
809     enum class Mode {
810       // Gather mode does not traverse into specializations. It looks for
811       // locations where it would help to have more info from template
812       // specializations.
813       GatherDependent,
814 
815       // Analyze mode traverses into template specializations and records
816       // information about token locations saved in gather mode.
817       AnalyzeDependent,
818     };
819 
820     // We found a dependent scoped member expression! Keep track of it for
821     // later.
visitDependentIndexConsumer::AutoTemplateContext822     void visitDependent(SourceLocation Loc) {
823       if (CurMode == Mode::AnalyzeDependent) {
824         return;
825       }
826 
827       DependentLocations.insert(Loc.getRawEncoding());
828       if (Parent) {
829         Parent->visitDependent(Loc);
830       }
831     }
832 
inGatherModeIndexConsumer::AutoTemplateContext833     bool inGatherMode() {
834       return CurMode == Mode::GatherDependent;
835     }
836 
837     // Do we need to perform the extra AnalyzeDependent passes (one per
838     // instantiation)?
needsAnalysisIndexConsumer::AutoTemplateContext839     bool needsAnalysis() const {
840       if (!DependentLocations.empty()) {
841         return true;
842       }
843       if (Parent) {
844         return Parent->needsAnalysis();
845       }
846       return false;
847     }
848 
switchModeIndexConsumer::AutoTemplateContext849     void switchMode() { CurMode = Mode::AnalyzeDependent; }
850 
851     // Do we want to analyze each template instantiation separately?
shouldVisitTemplateInstantiationsIndexConsumer::AutoTemplateContext852     bool shouldVisitTemplateInstantiations() const {
853       if (CurMode == Mode::AnalyzeDependent) {
854         return true;
855       }
856       if (Parent) {
857         return Parent->shouldVisitTemplateInstantiations();
858       }
859       return false;
860     }
861 
862     // For a given expression/statement, should we emit JSON data for it?
shouldVisitIndexConsumer::AutoTemplateContext863     bool shouldVisit(SourceLocation Loc) {
864       if (CurMode == Mode::GatherDependent) {
865         return true;
866       }
867       if (DependentLocations.find(Loc.getRawEncoding()) !=
868           DependentLocations.end()) {
869         return true;
870       }
871       if (Parent) {
872         return Parent->shouldVisit(Loc);
873       }
874       return false;
875     }
876 
877   private:
878     IndexConsumer *Self;
879     Mode CurMode;
880     std::unordered_set<unsigned> DependentLocations;
881     AutoTemplateContext *Parent;
882   };
883 
884   AutoTemplateContext *TemplateStack;
885 
shouldVisitTemplateInstantiations() const886   bool shouldVisitTemplateInstantiations() const {
887     if (TemplateStack) {
888       return TemplateStack->shouldVisitTemplateInstantiations();
889     }
890     return false;
891   }
892 
shouldVisitImplicitCode() const893   bool shouldVisitImplicitCode() const {
894     return CurDeclContext && CurDeclContext->VisitImplicit;
895   }
896 
TraverseClassTemplateDecl(ClassTemplateDecl * D)897   bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
898     AutoTemplateContext Atc(this);
899     Super::TraverseClassTemplateDecl(D);
900 
901     if (!Atc.needsAnalysis()) {
902       return true;
903     }
904 
905     Atc.switchMode();
906 
907     if (D != D->getCanonicalDecl()) {
908       return true;
909     }
910 
911     for (auto *Spec : D->specializations()) {
912       for (auto *Rd : Spec->redecls()) {
913         // We don't want to visit injected-class-names in this traversal.
914         if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
915           continue;
916 
917         TraverseDecl(Rd);
918       }
919     }
920 
921     return true;
922   }
923 
TraverseFunctionTemplateDecl(FunctionTemplateDecl * D)924   bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
925     AutoTemplateContext Atc(this);
926     if (Atc.inGatherMode()) {
927       Super::TraverseFunctionTemplateDecl(D);
928     }
929 
930     if (!Atc.needsAnalysis()) {
931       return true;
932     }
933 
934     Atc.switchMode();
935 
936     if (D != D->getCanonicalDecl()) {
937       return true;
938     }
939 
940     for (auto *Spec : D->specializations()) {
941       for (auto *Rd : Spec->redecls()) {
942         TraverseDecl(Rd);
943       }
944     }
945 
946     return true;
947   }
948 
shouldVisit(SourceLocation Loc)949   bool shouldVisit(SourceLocation Loc) {
950     if (TemplateStack) {
951       return TemplateStack->shouldVisit(Loc);
952     }
953     return true;
954   }
955 
956   enum {
957     NoCrossref = 1 << 0,
958     OperatorToken = 1 << 1,
959   };
960 
961   // This is the only function that emits analysis JSON data. It should be
962   // called for each identifier that corresponds to a symbol.
visitIdentifier(const char * Kind,const char * SyntaxKind,std::string QualName,SourceLocation Loc,const std::vector<std::string> & Symbols,Context TokenContext=Context (),int Flags=0,SourceRange PeekRange=SourceRange (),SourceRange NestingRange=SourceRange ())963   void visitIdentifier(const char *Kind, const char *SyntaxKind,
964                        std::string QualName, SourceLocation Loc,
965                        const std::vector<std::string> &Symbols,
966                        Context TokenContext = Context(), int Flags = 0,
967                        SourceRange PeekRange = SourceRange(),
968                        SourceRange NestingRange = SourceRange()) {
969     if (!shouldVisit(Loc)) {
970       return;
971     }
972 
973     // Find the file positions corresponding to the token.
974     unsigned StartOffset = SM.getFileOffset(Loc);
975     unsigned EndOffset =
976         StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
977 
978     std::string LocStr = locationToString(Loc, EndOffset - StartOffset);
979     std::string RangeStr = locationToString(Loc, EndOffset - StartOffset);
980     std::string PeekRangeStr;
981 
982     if (!(Flags & OperatorToken)) {
983       // Get the token's characters so we can make sure it's a valid token.
984       const char *StartChars = SM.getCharacterData(Loc);
985       std::string Text(StartChars, EndOffset - StartOffset);
986       if (!isValidIdentifier(Text)) {
987         return;
988       }
989     }
990 
991     FileInfo *F = getFileInfo(Loc);
992 
993     std::string SymbolList;
994 
995     // Reserve space in symbolList for everything in `symbols`. `symbols` can
996     // contain some very long strings.
997     size_t Total = 0;
998     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
999       Total += It->length();
1000     }
1001 
1002     // Space for commas.
1003     Total += Symbols.size() - 1;
1004     SymbolList.reserve(Total);
1005 
1006     // For each symbol, generate one "target":1 item. We want to find this line
1007     // if someone searches for any one of these symbols.
1008     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
1009       std::string Symbol = *It;
1010 
1011       if (!(Flags & NoCrossref)) {
1012         JSONFormatter Fmt;
1013 
1014         Fmt.add("loc", LocStr);
1015         Fmt.add("target", 1);
1016         Fmt.add("kind", Kind);
1017         Fmt.add("pretty", QualName);
1018         Fmt.add("sym", Symbol);
1019         if (!TokenContext.Name.empty()) {
1020           Fmt.add("context", TokenContext.Name);
1021         }
1022         std::string ContextSymbol = concatSymbols(TokenContext.Symbols);
1023         if (!ContextSymbol.empty()) {
1024           Fmt.add("contextsym", ContextSymbol);
1025         }
1026         if (PeekRange.isValid()) {
1027           PeekRangeStr = lineRangeToString(PeekRange);
1028           if (!PeekRangeStr.empty()) {
1029             Fmt.add("peekRange", PeekRangeStr);
1030           }
1031         }
1032 
1033         std::string S;
1034         Fmt.format(S);
1035         F->Output.push_back(std::move(S));
1036       }
1037 
1038       if (It != Symbols.begin()) {
1039         SymbolList.push_back(',');
1040       }
1041       SymbolList.append(Symbol);
1042     }
1043 
1044     // Generate a single "source":1 for all the symbols. If we search from here,
1045     // we want to union the results for every symbol in `symbols`.
1046     JSONFormatter Fmt;
1047 
1048     Fmt.add("loc", RangeStr);
1049     Fmt.add("source", 1);
1050 
1051     if (NestingRange.isValid()) {
1052       std::string NestingRangeStr = fullRangeToString(NestingRange);
1053       if (!NestingRangeStr.empty()) {
1054         Fmt.add("nestingRange", NestingRangeStr);
1055       }
1056     }
1057 
1058     std::string Syntax;
1059     if (Flags & NoCrossref) {
1060       Fmt.add("syntax", "");
1061     } else {
1062       Syntax = Kind;
1063       Syntax.push_back(',');
1064       Syntax.append(SyntaxKind);
1065       Fmt.add("syntax", Syntax);
1066     }
1067 
1068     std::string Pretty(SyntaxKind);
1069     Pretty.push_back(' ');
1070     Pretty.append(QualName);
1071     Fmt.add("pretty", Pretty);
1072 
1073     Fmt.add("sym", SymbolList);
1074 
1075     if (Flags & NoCrossref) {
1076       Fmt.add("no_crossref", 1);
1077     }
1078 
1079     std::string Buf;
1080     Fmt.format(Buf);
1081     F->Output.push_back(std::move(Buf));
1082   }
1083 
visitIdentifier(const char * Kind,const char * SyntaxKind,std::string QualName,SourceLocation Loc,std::string Symbol,Context TokenContext=Context (),int Flags=0,SourceRange PeekRange=SourceRange (),SourceRange NestingRange=SourceRange ())1084   void visitIdentifier(const char *Kind, const char *SyntaxKind,
1085                        std::string QualName, SourceLocation Loc, std::string Symbol,
1086                        Context TokenContext = Context(), int Flags = 0,
1087                        SourceRange PeekRange = SourceRange(),
1088                        SourceRange NestingRange = SourceRange()) {
1089     std::vector<std::string> V = {Symbol};
1090     visitIdentifier(Kind, SyntaxKind, QualName, Loc, V, TokenContext, Flags,
1091                     PeekRange, NestingRange);
1092   }
1093 
normalizeLocation(SourceLocation * Loc)1094   void normalizeLocation(SourceLocation *Loc) {
1095     *Loc = SM.getSpellingLoc(*Loc);
1096   }
1097 
1098   // For cases where the left-brace is not directly accessible from the AST,
1099   // helper to use the lexer to find the brace.  Make sure you're picking the
1100   // start location appropriately!
findLeftBraceFromLoc(SourceLocation Loc)1101   SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
1102     return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
1103   }
1104 
1105   // If the provided statement is compound, return its range.
getCompoundStmtRange(Stmt * D)1106   SourceRange getCompoundStmtRange(Stmt* D) {
1107     if (!D) {
1108       return SourceRange();
1109     }
1110 
1111     CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
1112     if (D2) {
1113       return D2->getSourceRange();
1114     }
1115 
1116     return SourceRange();
1117   }
1118 
getFunctionPeekRange(FunctionDecl * D)1119   SourceRange getFunctionPeekRange(FunctionDecl* D) {
1120     // We always start at the start of the function decl, which may include the
1121     // return type on a separate line.
1122     SourceLocation Start = D->getBeginLoc();
1123 
1124     // By default, we end at the line containing the function's name.
1125     SourceLocation End = D->getLocation();
1126 
1127     std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1128 
1129     // But if there are parameters, we want to include those as well.
1130     for (ParmVarDecl* Param : D->parameters()) {
1131       std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation());
1132 
1133       // It's possible there are macros involved or something. We don't include
1134       // the parameters in that case.
1135       if (ParamLoc.first == FuncLoc.first) {
1136         // Assume parameters are in order, so we always take the last one.
1137         End = Param->getEndLoc();
1138       }
1139     }
1140 
1141     return SourceRange(Start, End);
1142   }
1143 
getTagPeekRange(TagDecl * D)1144   SourceRange getTagPeekRange(TagDecl* D) {
1145     SourceLocation Start = D->getBeginLoc();
1146 
1147     // By default, we end at the line containing the name.
1148     SourceLocation End = D->getLocation();
1149 
1150     std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1151 
1152     if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) {
1153       // But if there are parameters, we want to include those as well.
1154       for (CXXBaseSpecifier& Base : D2->bases()) {
1155         std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc());
1156 
1157         // It's possible there are macros involved or something. We don't include
1158         // the parameters in that case.
1159         if (Loc.first == FuncLoc.first) {
1160           // Assume parameters are in order, so we always take the last one.
1161           End = Base.getEndLoc();
1162         }
1163       }
1164     }
1165 
1166     return SourceRange(Start, End);
1167   }
1168 
getCommentRange(NamedDecl * D)1169   SourceRange getCommentRange(NamedDecl* D) {
1170     const RawComment* RC =
1171       AstContext->getRawCommentForDeclNoCache(D);
1172     if (!RC) {
1173       return SourceRange();
1174     }
1175 
1176     return RC->getSourceRange();
1177   }
1178 
1179   // Sanity checks that all ranges are in the same file, returning the first if
1180   // they're in different files.  Unions the ranges based on which is first.
combineRanges(SourceRange Range1,SourceRange Range2)1181   SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
1182     if (Range1.isInvalid()) {
1183       return Range2;
1184     }
1185     if (Range2.isInvalid()) {
1186       return Range1;
1187     }
1188 
1189     std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin());
1190     std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd());
1191     std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin());
1192     std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd());
1193 
1194     if (End1.first != Begin2.first) {
1195       // Something weird is probably happening with the preprocessor. Just
1196       // return the first range.
1197       return Range1;
1198     }
1199 
1200     // See which range comes first.
1201     if (Begin1.second <= End2.second) {
1202       return SourceRange(Range1.getBegin(), Range2.getEnd());
1203     } else {
1204       return SourceRange(Range2.getBegin(), Range1.getEnd());
1205     }
1206   }
1207 
1208   // Given a location and a range, returns the range if:
1209   // - The location and the range live in the same file.
1210   // - The range is well ordered (end is not before begin).
1211   // Returns an empty range otherwise.
validateRange(SourceLocation Loc,SourceRange Range)1212   SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
1213     std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
1214     std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
1215     std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
1216 
1217     if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
1218       return SourceRange();
1219     }
1220 
1221     if (Begin.second >= End.second) {
1222       return SourceRange();
1223     }
1224 
1225     return Range;
1226   }
1227 
VisitNamedDecl(NamedDecl * D)1228   bool VisitNamedDecl(NamedDecl *D) {
1229     SourceLocation Loc = D->getLocation();
1230 
1231     // If the token is from a macro expansion and the expansion location
1232     // is interesting, use that instead as it tends to be more useful.
1233     SourceLocation expandedLoc = Loc;
1234     if (SM.isMacroBodyExpansion(Loc)) {
1235       Loc = SM.getFileLoc(Loc);
1236     }
1237 
1238     normalizeLocation(&Loc);
1239     if (!isInterestingLocation(Loc)) {
1240       return true;
1241     }
1242 
1243     if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
1244       // Unnamed parameter in function proto.
1245       return true;
1246     }
1247 
1248     int Flags = 0;
1249     const char *Kind = "def";
1250     const char *PrettyKind = "?";
1251     SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
1252     // The nesting range identifies the left brace and right brace, which
1253     // heavily depends on the AST node type.
1254     SourceRange NestingRange;
1255     if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
1256       if (D2->isTemplateInstantiation()) {
1257         D = D2->getTemplateInstantiationPattern();
1258       }
1259       Kind = D2->isThisDeclarationADefinition() ? "def" : "decl";
1260       PrettyKind = "function";
1261       PeekRange = getFunctionPeekRange(D2);
1262 
1263       // Only emit the nesting range if:
1264       // - This is a definition AND
1265       // - This isn't a template instantiation.  Function templates'
1266       //   instantiations can end up as a definition with a Loc at their point
1267       //   of declaration but with the CompoundStmt of the template's
1268       //   point of definition.  This really messes up the nesting range logic.
1269       //   At the time of writing this, the test repo's `big_header.h`'s
1270       //   `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
1271       //   instantiated by `big_cpp.cpp` triggers this phenomenon.
1272       //
1273       // Note: As covered elsewhere, template processing is tricky and it's
1274       // conceivable that we may change traversal patterns in the future,
1275       // mooting this guard.
1276       if (D2->isThisDeclarationADefinition() &&
1277           !D2->isTemplateInstantiation()) {
1278         // The CompoundStmt range is the brace range.
1279         NestingRange = getCompoundStmtRange(D2->getBody());
1280       }
1281     } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
1282       Kind = D2->isThisDeclarationADefinition() ? "def" : "decl";
1283       PrettyKind = "type";
1284 
1285       if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
1286         PeekRange = getTagPeekRange(D2);
1287         NestingRange = D2->getBraceRange();
1288       } else {
1289         PeekRange = SourceRange();
1290       }
1291     } else if (isa<TypedefNameDecl>(D)) {
1292       Kind = "def";
1293       PrettyKind = "type";
1294       PeekRange = SourceRange(Loc, Loc);
1295     } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
1296       if (D2->isLocalVarDeclOrParm()) {
1297         Flags = NoCrossref;
1298       }
1299 
1300       Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
1301                  ? "decl"
1302                  : "def";
1303       PrettyKind = "variable";
1304     } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
1305       Kind = "def";
1306       PrettyKind = "namespace";
1307       PeekRange = SourceRange(Loc, Loc);
1308       NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
1309       if (D2) {
1310         // There's no exposure of the left brace so we have to find it.
1311         NestingRange = SourceRange(
1312           findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc),
1313           D2->getRBraceLoc());
1314       }
1315     } else if (isa<FieldDecl>(D)) {
1316       Kind = "def";
1317       PrettyKind = "field";
1318     } else if (isa<EnumConstantDecl>(D)) {
1319       Kind = "def";
1320       PrettyKind = "enum constant";
1321     } else {
1322       return true;
1323     }
1324 
1325     SourceRange CommentRange = getCommentRange(D);
1326     PeekRange = combineRanges(PeekRange, CommentRange);
1327     PeekRange = validateRange(Loc, PeekRange);
1328     NestingRange = validateRange(Loc, NestingRange);
1329 
1330     std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)};
1331     if (CXXMethodDecl::classof(D)) {
1332       Symbols.clear();
1333       findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols);
1334     }
1335 
1336     // In the case of destructors, Loc might point to the ~ character. In that
1337     // case we want to skip to the name of the class. However, Loc might also
1338     // point to other places that generate destructors, such as the use site of
1339     // a macro that expands to generate a destructor, or a lambda (apparently
1340     // clang 8 creates a destructor declaration for at least some lambdas). In
1341     // the former case we'll use the macro use site as the location, and in the
1342     // latter we'll just drop the declaration.
1343     if (isa<CXXDestructorDecl>(D)) {
1344       PrettyKind = "destructor";
1345       const char *P = SM.getCharacterData(Loc);
1346       if (*P == '~') {
1347         // Advance Loc to the class name
1348         P++;
1349 
1350         unsigned Skipped = 1;
1351         while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
1352           P++;
1353           Skipped++;
1354         }
1355 
1356         Loc = Loc.getLocWithOffset(Skipped);
1357       } else {
1358         // See if the destructor is coming from a macro expansion
1359         P = SM.getCharacterData(expandedLoc);
1360         if (*P != '~') {
1361           // It's not
1362           return true;
1363         }
1364         // It is, so just use Loc as-is
1365       }
1366     }
1367 
1368     visitIdentifier(Kind, PrettyKind, getQualifiedName(D), Loc, Symbols,
1369                     getContext(D), Flags, PeekRange, NestingRange);
1370 
1371     return true;
1372   }
1373 
VisitCXXConstructExpr(CXXConstructExpr * E)1374   bool VisitCXXConstructExpr(CXXConstructExpr *E) {
1375     SourceLocation Loc = E->getBeginLoc();
1376     normalizeLocation(&Loc);
1377     if (!isInterestingLocation(Loc)) {
1378       return true;
1379     }
1380 
1381     FunctionDecl *Ctor = E->getConstructor();
1382     if (Ctor->isTemplateInstantiation()) {
1383       Ctor = Ctor->getTemplateInstantiationPattern();
1384     }
1385     std::string Mangled = getMangledName(CurMangleContext, Ctor);
1386 
1387     // FIXME: Need to do something different for list initialization.
1388 
1389     visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
1390                     getContext(Loc));
1391 
1392     return true;
1393   }
1394 
VisitCallExpr(CallExpr * E)1395   bool VisitCallExpr(CallExpr *E) {
1396     Decl *Callee = E->getCalleeDecl();
1397     if (!Callee || !FunctionDecl::classof(Callee)) {
1398       return true;
1399     }
1400 
1401     const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
1402 
1403     SourceLocation Loc;
1404 
1405     const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
1406     if (F->isTemplateInstantiation()) {
1407       NamedCallee = F->getTemplateInstantiationPattern();
1408     }
1409 
1410     std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
1411     int Flags = 0;
1412 
1413     Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
1414 
1415     if (CXXOperatorCallExpr::classof(E)) {
1416       // Just take the first token.
1417       CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
1418       Loc = Op->getOperatorLoc();
1419       Flags |= OperatorToken;
1420     } else if (MemberExpr::classof(CalleeExpr)) {
1421       MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
1422       Loc = Member->getMemberLoc();
1423     } else if (DeclRefExpr::classof(CalleeExpr)) {
1424       // We handle this in VisitDeclRefExpr.
1425       return true;
1426     } else {
1427       return true;
1428     }
1429 
1430     normalizeLocation(&Loc);
1431 
1432     if (!isInterestingLocation(Loc)) {
1433       return true;
1434     }
1435 
1436     visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled,
1437                     getContext(Loc), Flags);
1438 
1439     return true;
1440   }
1441 
VisitTagTypeLoc(TagTypeLoc L)1442   bool VisitTagTypeLoc(TagTypeLoc L) {
1443     SourceLocation Loc = L.getBeginLoc();
1444     normalizeLocation(&Loc);
1445     if (!isInterestingLocation(Loc)) {
1446       return true;
1447     }
1448 
1449     TagDecl *Decl = L.getDecl();
1450     std::string Mangled = getMangledName(CurMangleContext, Decl);
1451     visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1452                     getContext(Loc));
1453     return true;
1454   }
1455 
VisitTypedefTypeLoc(TypedefTypeLoc L)1456   bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
1457     SourceLocation Loc = L.getBeginLoc();
1458     normalizeLocation(&Loc);
1459     if (!isInterestingLocation(Loc)) {
1460       return true;
1461     }
1462 
1463     NamedDecl *Decl = L.getTypedefNameDecl();
1464     std::string Mangled = getMangledName(CurMangleContext, Decl);
1465     visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1466                     getContext(Loc));
1467     return true;
1468   }
1469 
VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L)1470   bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
1471     SourceLocation Loc = L.getBeginLoc();
1472     normalizeLocation(&Loc);
1473     if (!isInterestingLocation(Loc)) {
1474       return true;
1475     }
1476 
1477     NamedDecl *Decl = L.getDecl();
1478     std::string Mangled = getMangledName(CurMangleContext, Decl);
1479     visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1480                     getContext(Loc));
1481     return true;
1482   }
1483 
VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L)1484   bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
1485     SourceLocation Loc = L.getBeginLoc();
1486     normalizeLocation(&Loc);
1487     if (!isInterestingLocation(Loc)) {
1488       return true;
1489     }
1490 
1491     TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
1492     if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
1493       NamedDecl *Decl = D->getTemplatedDecl();
1494       std::string Mangled = getMangledName(CurMangleContext, Decl);
1495       visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1496                       getContext(Loc));
1497     } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
1498       NamedDecl *Decl = D->getTemplatedDecl();
1499       std::string Mangled = getMangledName(CurMangleContext, Decl);
1500       visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1501                       getContext(Loc));
1502     }
1503 
1504     return true;
1505   }
1506 
VisitDeclRefExpr(DeclRefExpr * E)1507   bool VisitDeclRefExpr(DeclRefExpr *E) {
1508     SourceLocation Loc = E->getExprLoc();
1509     normalizeLocation(&Loc);
1510     if (!isInterestingLocation(Loc)) {
1511       return true;
1512     }
1513 
1514     if (E->hasQualifier()) {
1515       Loc = E->getNameInfo().getLoc();
1516       normalizeLocation(&Loc);
1517     }
1518 
1519     NamedDecl *Decl = E->getDecl();
1520     if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
1521       int Flags = 0;
1522       if (D2->isLocalVarDeclOrParm()) {
1523         Flags = NoCrossref;
1524       }
1525       std::string Mangled = getMangledName(CurMangleContext, Decl);
1526       visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
1527                       getContext(Loc), Flags);
1528     } else if (isa<FunctionDecl>(Decl)) {
1529       const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
1530       if (F->isTemplateInstantiation()) {
1531         Decl = F->getTemplateInstantiationPattern();
1532       }
1533 
1534       std::string Mangled = getMangledName(CurMangleContext, Decl);
1535       visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
1536                       getContext(Loc));
1537     } else if (isa<EnumConstantDecl>(Decl)) {
1538       std::string Mangled = getMangledName(CurMangleContext, Decl);
1539       visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
1540                       getContext(Loc));
1541     }
1542 
1543     return true;
1544   }
1545 
VisitCXXConstructorDecl(CXXConstructorDecl * D)1546   bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
1547     if (!isInterestingLocation(D->getLocation())) {
1548       return true;
1549     }
1550 
1551     for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
1552          It != D->init_end(); ++It) {
1553       const CXXCtorInitializer *Ci = *It;
1554       if (!Ci->getMember() || !Ci->isWritten()) {
1555         continue;
1556       }
1557 
1558       SourceLocation Loc = Ci->getMemberLocation();
1559       normalizeLocation(&Loc);
1560       if (!isInterestingLocation(Loc)) {
1561         continue;
1562       }
1563 
1564       FieldDecl *Member = Ci->getMember();
1565       std::string Mangled = getMangledName(CurMangleContext, Member);
1566       visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
1567                       getContext(D));
1568     }
1569 
1570     return true;
1571   }
1572 
VisitMemberExpr(MemberExpr * E)1573   bool VisitMemberExpr(MemberExpr *E) {
1574     SourceLocation Loc = E->getExprLoc();
1575     normalizeLocation(&Loc);
1576     if (!isInterestingLocation(Loc)) {
1577       return true;
1578     }
1579 
1580     ValueDecl *Decl = E->getMemberDecl();
1581     if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
1582       std::string Mangled = getMangledName(CurMangleContext, Field);
1583       visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
1584                       getContext(Loc));
1585     }
1586     return true;
1587   }
1588 
VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr * E)1589   bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
1590     SourceLocation Loc = E->getMemberLoc();
1591     normalizeLocation(&Loc);
1592     if (!isInterestingLocation(Loc)) {
1593       return true;
1594     }
1595 
1596     if (TemplateStack) {
1597       TemplateStack->visitDependent(Loc);
1598     }
1599     return true;
1600   }
1601 
macroDefined(const Token & Tok,const MacroDirective * Macro)1602   void macroDefined(const Token &Tok, const MacroDirective *Macro) {
1603     if (Macro->getMacroInfo()->isBuiltinMacro()) {
1604       return;
1605     }
1606     SourceLocation Loc = Tok.getLocation();
1607     normalizeLocation(&Loc);
1608     if (!isInterestingLocation(Loc)) {
1609       return;
1610     }
1611 
1612     IdentifierInfo *Ident = Tok.getIdentifierInfo();
1613     if (Ident) {
1614       std::string Mangled =
1615           std::string("M_") + mangleLocation(Loc, Ident->getName());
1616       visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
1617     }
1618   }
1619 
macroUsed(const Token & Tok,const MacroInfo * Macro)1620   void macroUsed(const Token &Tok, const MacroInfo *Macro) {
1621     if (!Macro) {
1622       return;
1623     }
1624     if (Macro->isBuiltinMacro()) {
1625       return;
1626     }
1627     SourceLocation Loc = Tok.getLocation();
1628     normalizeLocation(&Loc);
1629     if (!isInterestingLocation(Loc)) {
1630       return;
1631     }
1632 
1633     IdentifierInfo *Ident = Tok.getIdentifierInfo();
1634     if (Ident) {
1635       std::string Mangled =
1636           std::string("M_") +
1637           mangleLocation(Macro->getDefinitionLoc(), Ident->getName());
1638       visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
1639     }
1640   }
1641 };
1642 
MacroDefined(const Token & Tok,const MacroDirective * Md)1643 void PreprocessorHook::MacroDefined(const Token &Tok,
1644                                     const MacroDirective *Md) {
1645   Indexer->macroDefined(Tok, Md);
1646 }
1647 
MacroExpands(const Token & Tok,const MacroDefinition & Md,SourceRange Range,const MacroArgs * Ma)1648 void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
1649                                     SourceRange Range, const MacroArgs *Ma) {
1650   Indexer->macroUsed(Tok, Md.getMacroInfo());
1651 }
1652 
MacroUndefined(const Token & Tok,const MacroDefinition & Md,const MacroDirective * Undef)1653 void PreprocessorHook::MacroUndefined(const Token &Tok,
1654                                       const MacroDefinition &Md,
1655                                       const MacroDirective *Undef)
1656 {
1657   Indexer->macroUsed(Tok, Md.getMacroInfo());
1658 }
1659 
Defined(const Token & Tok,const MacroDefinition & Md,SourceRange Range)1660 void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
1661                                SourceRange Range) {
1662   Indexer->macroUsed(Tok, Md.getMacroInfo());
1663 }
1664 
Ifdef(SourceLocation Loc,const Token & Tok,const MacroDefinition & Md)1665 void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
1666                              const MacroDefinition &Md) {
1667   Indexer->macroUsed(Tok, Md.getMacroInfo());
1668 }
1669 
Ifndef(SourceLocation Loc,const Token & Tok,const MacroDefinition & Md)1670 void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
1671                               const MacroDefinition &Md) {
1672   Indexer->macroUsed(Tok, Md.getMacroInfo());
1673 }
1674 
1675 class IndexAction : public PluginASTAction {
1676 protected:
CreateASTConsumer(CompilerInstance & CI,llvm::StringRef F)1677   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
1678                                                  llvm::StringRef F) {
1679     return make_unique<IndexConsumer>(CI);
1680   }
1681 
ParseArgs(const CompilerInstance & CI,const std::vector<std::string> & Args)1682   bool ParseArgs(const CompilerInstance &CI,
1683                  const std::vector<std::string> &Args) {
1684     if (Args.size() != 3) {
1685       DiagnosticsEngine &D = CI.getDiagnostics();
1686       unsigned DiagID = D.getCustomDiagID(
1687           DiagnosticsEngine::Error,
1688           "Need arguments for the source, output, and object directories");
1689       D.Report(DiagID);
1690       return false;
1691     }
1692 
1693     // Load our directories
1694     Srcdir = getAbsolutePath(Args[0]);
1695     if (Srcdir.empty()) {
1696       DiagnosticsEngine &D = CI.getDiagnostics();
1697       unsigned DiagID = D.getCustomDiagID(
1698           DiagnosticsEngine::Error, "Source directory '%0' does not exist");
1699       D.Report(DiagID) << Args[0];
1700       return false;
1701     }
1702 
1703     ensurePath(Args[1] + PATHSEP_STRING);
1704     Outdir = getAbsolutePath(Args[1]);
1705     Outdir += PATHSEP_STRING;
1706 
1707     Objdir = getAbsolutePath(Args[2]);
1708     if (Objdir.empty()) {
1709       DiagnosticsEngine &D = CI.getDiagnostics();
1710       unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
1711                                           "Objdir '%0' does not exist");
1712       D.Report(DiagID) << Args[2];
1713       return false;
1714     }
1715     Objdir += PATHSEP_STRING;
1716 
1717     printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
1718            Objdir.c_str());
1719 
1720     return true;
1721   }
1722 
printHelp(llvm::raw_ostream & Ros)1723   void printHelp(llvm::raw_ostream &Ros) {
1724     Ros << "Help for mozsearch plugin goes here\n";
1725   }
1726 };
1727 
1728 static FrontendPluginRegistry::Add<IndexAction>
1729     Y("mozsearch-index", "create the mozsearch index database");
1730