1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "clang/AST/AST.h"
7 #include "clang/AST/ASTConsumer.h"
8 #include "clang/AST/ASTContext.h"
9 #include "clang/AST/Expr.h"
10 #include "clang/AST/ExprCXX.h"
11 #include "clang/AST/Mangle.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/Basic/FileManager.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/Version.h"
16 #include "clang/Frontend/CompilerInstance.h"
17 #include "clang/Frontend/FrontendPluginRegistry.h"
18 #include "clang/Lex/Lexer.h"
19 #include "clang/Lex/PPCallbacks.h"
20 #include "clang/Lex/Preprocessor.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 #include <iostream>
25 #include <map>
26 #include <memory>
27 #include <sstream>
28 #include <tuple>
29 #include <unordered_set>
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 
34 #include "FileOperations.h"
35 #include "JSONFormatter.h"
36 #include "StringOperations.h"
37 
38 #if CLANG_VERSION_MAJOR < 8
39 // Starting with Clang 8.0 some basic functions have been renamed
40 #define getBeginLoc getLocStart
41 #define getEndLoc getLocEnd
42 #endif
43 // We want std::make_unique, but that's only available in c++14.  In versions
44 // prior to that, we need to fall back to llvm's make_unique.  It's also the
45 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to
46 // build with c++11, at least as suggested by the llvm-config --cxxflags on
47 // non-windows platforms.  mozilla-central seems to build with -std=c++17 on
48 // windows so we need to make this decision based on __cplusplus instead of
49 // the CLANG_VERSION_MAJOR.
50 #if __cplusplus < 201402L
51 using llvm::make_unique;
52 #else
53 using std::make_unique;
54 #endif
55 
56 using namespace clang;
57 
58 const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
59 
60 // Absolute path to directory containing source code.
61 std::string Srcdir;
62 
63 // Absolute path to objdir (including generated code).
64 std::string Objdir;
65 
66 // Absolute path where analysis JSON output will be stored.
67 std::string Outdir;
68 
69 enum class FileType {
70   // The file was either in the source tree nor objdir. It might be a system
71   // include, for example.
72   Unknown,
73   // A file from the source tree.
74   Source,
75   // A file from the objdir.
76   Generated,
77 };
78 
79 // Takes an absolute path to a file, and returns the type of file it is. If
80 // it's a Source or Generated file, the provided inout path argument is modified
81 // in-place so that it is relative to the source dir or objdir, respectively.
relativizePath(std::string & path)82 FileType relativizePath(std::string& path) {
83   if (path.compare(0, Objdir.length(), Objdir) == 0) {
84     path.replace(0, Objdir.length(), GENERATED);
85     return FileType::Generated;
86   }
87   // Empty filenames can get turned into Srcdir when they are resolved as
88   // absolute paths, so we should exclude files that are exactly equal to
89   // Srcdir or anything outside Srcdir.
90   if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) {
91     // Remove the trailing `/' as well.
92     path.erase(0, Srcdir.length() + 1);
93     return FileType::Source;
94   }
95   return FileType::Unknown;
96 }
97 
98 #if !defined(_WIN32) && !defined(_WIN64)
99 #include <sys/time.h>
100 
time()101 static double time() {
102   struct timeval Tv;
103   gettimeofday(&Tv, nullptr);
104   return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
105 }
106 #endif
107 
108 // Return true if |input| is a valid C++ identifier. We don't want to generate
109 // analysis information for operators, string literals, etc. by accident since
110 // it trips up consumers of the data.
isValidIdentifier(std::string Input)111 static bool isValidIdentifier(std::string Input) {
112   for (char C : Input) {
113     if (!(isalpha(C) || isdigit(C) || C == '_')) {
114       return false;
115     }
116   }
117   return true;
118 }
119 
120 struct RAIITracer {
RAIITracerRAIITracer121   RAIITracer(const char *log) : mLog(log) {
122     printf("<%s>\n", mLog);
123   }
124 
~RAIITracerRAIITracer125   ~RAIITracer() {
126     printf("</%s>\n", mLog);
127   }
128 
129   const char* mLog;
130 };
131 
132 #define TRACEFUNC RAIITracer tracer(__FUNCTION__);
133 
134 class IndexConsumer;
135 
136 // For each C++ file seen by the analysis (.cpp or .h), we track a
137 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether
138 // it's in the source dir or the objdir). We also store the analysis output
139 // here.
140 struct FileInfo {
FileInfoFileInfo141   FileInfo(std::string &Rname) : Realname(Rname) {
142     switch (relativizePath(Realname)) {
143       case FileType::Generated:
144         Interesting = true;
145         Generated = true;
146         break;
147       case FileType::Source:
148         Interesting = true;
149         Generated = false;
150         break;
151       case FileType::Unknown:
152         Interesting = false;
153         Generated = false;
154         break;
155     }
156   }
157   std::string Realname;
158   std::vector<std::string> Output;
159   bool Interesting;
160   bool Generated;
161 };
162 
163 class IndexConsumer;
164 
165 class PreprocessorHook : public PPCallbacks {
166   IndexConsumer *Indexer;
167 
168 public:
PreprocessorHook(IndexConsumer * C)169   PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
170 
171   virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
172                            SrcMgr::CharacteristicKind FileType,
173                            FileID PrevFID) override;
174 
175   virtual void InclusionDirective(SourceLocation HashLoc,
176                                   const Token &IncludeTok,
177                                   StringRef FileName,
178                                   bool IsAngled,
179                                   CharSourceRange FileNameRange,
180                                   const FileEntry *File,
181                                   StringRef SearchPath,
182                                   StringRef RelativePath,
183                                   const Module *Imported,
184                                   SrcMgr::CharacteristicKind FileType) override;
185 
186   virtual void MacroDefined(const Token &Tok,
187                             const MacroDirective *Md) override;
188 
189   virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
190                             SourceRange Range, const MacroArgs *Ma) override;
191   virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
192                               const MacroDirective *Undef) override;
193   virtual void Defined(const Token &Tok, const MacroDefinition &Md,
194                        SourceRange Range) override;
195   virtual void Ifdef(SourceLocation Loc, const Token &Tok,
196                      const MacroDefinition &Md) override;
197   virtual void Ifndef(SourceLocation Loc, const Token &Tok,
198                       const MacroDefinition &Md) override;
199 };
200 
201 class IndexConsumer : public ASTConsumer,
202                       public RecursiveASTVisitor<IndexConsumer>,
203                       public DiagnosticConsumer {
204 private:
205   CompilerInstance &CI;
206   SourceManager &SM;
207   LangOptions &LO;
208   std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
209   MangleContext *CurMangleContext;
210   ASTContext *AstContext;
211 
212   typedef RecursiveASTVisitor<IndexConsumer> Super;
213 
214   // Tracks the set of declarations that the current expression/statement is
215   // nested inside of.
216   struct AutoSetContext {
AutoSetContextIndexConsumer::AutoSetContext217     AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false)
218         : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
219       this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false);
220       Self->CurDeclContext = this;
221     }
222 
~AutoSetContextIndexConsumer::AutoSetContext223     ~AutoSetContext() { Self->CurDeclContext = Prev; }
224 
225     IndexConsumer *Self;
226     AutoSetContext *Prev;
227     NamedDecl *Decl;
228     bool VisitImplicit;
229   };
230   AutoSetContext *CurDeclContext;
231 
getFileInfo(SourceLocation Loc)232   FileInfo *getFileInfo(SourceLocation Loc) {
233     FileID Id = SM.getFileID(Loc);
234 
235     std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
236     It = FileMap.find(Id);
237     if (It == FileMap.end()) {
238       // We haven't seen this file before. We need to make the FileInfo
239       // structure information ourselves
240       std::string Filename = std::string(SM.getFilename(Loc));
241       std::string Absolute;
242       // If Loc is a macro id rather than a file id, it Filename might be
243       // empty. Also for some types of file locations that are clang-internal
244       // like "<scratch>" it can return an empty Filename. In these cases we
245       // want to leave Absolute as empty.
246       if (!Filename.empty()) {
247         Absolute = getAbsolutePath(Filename);
248         if (Absolute.empty()) {
249           Absolute = Filename;
250         }
251       }
252       std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute);
253       It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
254     }
255     return It->second.get();
256   }
257 
258   // Helpers for processing declarations
259   // Should we ignore this location?
isInterestingLocation(SourceLocation Loc)260   bool isInterestingLocation(SourceLocation Loc) {
261     if (Loc.isInvalid()) {
262       return false;
263     }
264 
265     return getFileInfo(Loc)->Interesting;
266   }
267 
268   // Convert location to "line:column" or "line:column-column" given length.
269   // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
270   // column is 0-based and unpadded.
locationToString(SourceLocation Loc,size_t Length=0)271   std::string locationToString(SourceLocation Loc, size_t Length = 0) {
272     std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc);
273 
274     bool IsInvalid;
275     unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
276     if (IsInvalid) {
277       return "";
278     }
279     unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
280     if (IsInvalid) {
281       return "";
282     }
283 
284     if (Length) {
285       return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
286     } else {
287       return stringFormat("%05d:%d", Line, Column - 1);
288     }
289   }
290 
291   // Convert SourceRange to "line-line".
292   // In the resulting string rep, line is 1-based.
lineRangeToString(SourceRange Range)293   std::string lineRangeToString(SourceRange Range) {
294     std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
295     std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
296 
297     bool IsInvalid;
298     unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
299     if (IsInvalid) {
300       return "";
301     }
302     unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
303     if (IsInvalid) {
304       return "";
305     }
306 
307     return stringFormat("%d-%d", Line1, Line2);
308   }
309 
310   // Convert SourceRange to "line:column-line:column".
311   // In the resulting string rep, line is 1-based, column is 0-based.
fullRangeToString(SourceRange Range)312   std::string fullRangeToString(SourceRange Range) {
313     std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
314     std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
315 
316     bool IsInvalid;
317     unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
318     if (IsInvalid) {
319       return "";
320     }
321     unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
322     if (IsInvalid) {
323       return "";
324     }
325     unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
326     if (IsInvalid) {
327       return "";
328     }
329     unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
330     if (IsInvalid) {
331       return "";
332     }
333 
334     return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
335   }
336 
337   // Returns the qualified name of `d` without considering template parameters.
getQualifiedName(const NamedDecl * D)338   std::string getQualifiedName(const NamedDecl *D) {
339     const DeclContext *Ctx = D->getDeclContext();
340     if (Ctx->isFunctionOrMethod()) {
341       return D->getQualifiedNameAsString();
342     }
343 
344     std::vector<const DeclContext *> Contexts;
345 
346     // Collect contexts.
347     while (Ctx && isa<NamedDecl>(Ctx)) {
348       Contexts.push_back(Ctx);
349       Ctx = Ctx->getParent();
350     }
351 
352     std::string Result;
353 
354     std::reverse(Contexts.begin(), Contexts.end());
355 
356     for (const DeclContext *DC : Contexts) {
357       if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
358         Result += Spec->getNameAsString();
359 
360         if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
361           std::string Backing;
362           llvm::raw_string_ostream Stream(Backing);
363           const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
364           printTemplateArgumentList(
365               Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts()));
366           Result += Stream.str();
367         }
368       } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
369         if (Nd->isAnonymousNamespace() || Nd->isInline()) {
370           continue;
371         }
372         Result += Nd->getNameAsString();
373       } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
374         if (!Rd->getIdentifier()) {
375           Result += "(anonymous)";
376         } else {
377           Result += Rd->getNameAsString();
378         }
379       } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
380         Result += Fd->getNameAsString();
381       } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
382         // C++ [dcl.enum]p10: Each enum-name and each unscoped
383         // enumerator is declared in the scope that immediately contains
384         // the enum-specifier. Each scoped enumerator is declared in the
385         // scope of the enumeration.
386         if (Ed->isScoped() || Ed->getIdentifier())
387           Result += Ed->getNameAsString();
388         else
389           continue;
390       } else {
391         Result += cast<NamedDecl>(DC)->getNameAsString();
392       }
393       Result += "::";
394     }
395 
396     if (D->getDeclName())
397       Result += D->getNameAsString();
398     else
399       Result += "(anonymous)";
400 
401     return Result;
402   }
403 
mangleLocation(SourceLocation Loc,std::string Backup=std::string ())404   std::string mangleLocation(SourceLocation Loc,
405                              std::string Backup = std::string()) {
406     FileInfo *F = getFileInfo(Loc);
407     std::string Filename = F->Realname;
408     if (Filename.length() == 0 && Backup.length() != 0) {
409       return Backup;
410     }
411     if (F->Generated) {
412       // Since generated files may be different on different platforms,
413       // we need to include a platform-specific thing in the hash. Otherwise
414       // we can end up with hash collisions where different symbols from
415       // different platforms map to the same thing.
416       char* Platform = getenv("MOZSEARCH_PLATFORM");
417       Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
418     }
419     return hash(Filename + std::string("@") + locationToString(Loc));
420   }
421 
isAcceptableSymbolChar(char c)422   bool isAcceptableSymbolChar(char c) {
423     return isalpha(c) || isdigit(c) || c == '_' || c == '/';
424   }
425 
mangleFile(std::string Filename,FileType Type)426   std::string mangleFile(std::string Filename, FileType Type) {
427     // "Mangle" the file path, such that:
428     // 1. The majority of paths will still be mostly human-readable.
429     // 2. The sanitization algorithm doesn't produce collisions where two
430     //    different unsanitized paths can result in the same sanitized paths.
431     // 3. The produced symbol doesn't cause problems with downstream consumers.
432     // In order to accomplish this, we keep alphanumeric chars, underscores,
433     // and slashes, and replace everything else with an "@xx" hex encoding.
434     // The majority of path characters are letters and slashes which don't get
435     // encoded, so that satisifies (1). Since "@" characters in the unsanitized
436     // path get encoded, there should be no "@" characters in the sanitized path
437     // that got preserved from the unsanitized input, so that should satisfy (2).
438     // And (3) was done by trial-and-error. Note in particular the dot (.)
439     // character needs to be encoded, or the symbol-search feature of mozsearch
440     // doesn't work correctly, as all dot characters in the symbol query get
441     // replaced by #.
442     for (size_t i = 0; i < Filename.length(); i++) {
443       char c = Filename[i];
444       if (isAcceptableSymbolChar(c)) {
445         continue;
446       }
447       char hex[4];
448       sprintf(hex, "@%02X", ((int)c) & 0xFF);
449       Filename.replace(i, 1, hex);
450       i += 2;
451     }
452 
453     if (Type == FileType::Generated) {
454       // Since generated files may be different on different platforms,
455       // we need to include a platform-specific thing in the hash. Otherwise
456       // we can end up with hash collisions where different symbols from
457       // different platforms map to the same thing.
458       char* Platform = getenv("MOZSEARCH_PLATFORM");
459       Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
460     }
461     return Filename;
462   }
463 
mangleQualifiedName(std::string Name)464   std::string mangleQualifiedName(std::string Name) {
465     std::replace(Name.begin(), Name.end(), ' ', '_');
466     return Name;
467   }
468 
getMangledName(clang::MangleContext * Ctx,const clang::NamedDecl * Decl)469   std::string getMangledName(clang::MangleContext *Ctx,
470                              const clang::NamedDecl *Decl) {
471     if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
472       return cast<FunctionDecl>(Decl)->getNameAsString();
473     }
474 
475     if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
476       const DeclContext *DC = Decl->getDeclContext();
477       if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
478           isa<LinkageSpecDecl>(DC) ||
479           // isa<ExternCContextDecl>(DC) ||
480           isa<TagDecl>(DC)) {
481         llvm::SmallVector<char, 512> Output;
482         llvm::raw_svector_ostream Out(Output);
483 #if CLANG_VERSION_MAJOR >= 11
484         // This code changed upstream in version 11:
485         // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47
486         GlobalDecl GD;
487         if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
488           GD = GlobalDecl(D, Ctor_Complete);
489         } else if (const CXXDestructorDecl *D =
490                        dyn_cast<CXXDestructorDecl>(Decl)) {
491           GD = GlobalDecl(D, Dtor_Complete);
492         } else {
493           GD = GlobalDecl(Decl);
494         }
495         Ctx->mangleName(GD, Out);
496 #else
497         if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
498           Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
499         } else if (const CXXDestructorDecl *D =
500                        dyn_cast<CXXDestructorDecl>(Decl)) {
501           Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
502         } else {
503           Ctx->mangleName(Decl, Out);
504         }
505 #endif
506         return Out.str().str();
507       } else {
508         return std::string("V_") + mangleLocation(Decl->getLocation()) +
509                std::string("_") + hash(std::string(Decl->getName()));
510       }
511     } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) ||
512                isa<ObjCInterfaceDecl>(Decl)) {
513       if (!Decl->getIdentifier()) {
514         // Anonymous.
515         return std::string("T_") + mangleLocation(Decl->getLocation());
516       }
517 
518       return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
519     } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
520       if (!Decl->getIdentifier()) {
521         // Anonymous.
522         return std::string("NS_") + mangleLocation(Decl->getLocation());
523       }
524 
525       return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
526     } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
527       const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
528       return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
529              D2->getNameAsString();
530     } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
531       const RecordDecl *Record = D2->getParent();
532       return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
533              D2->getNameAsString();
534     } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
535       const DeclContext *DC = Decl->getDeclContext();
536       if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
537         return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
538                D2->getNameAsString();
539       }
540     }
541 
542     assert(false);
543     return std::string("");
544   }
545 
debugLocation(SourceLocation Loc)546   void debugLocation(SourceLocation Loc) {
547     std::string S = locationToString(Loc);
548     StringRef Filename = SM.getFilename(Loc);
549     printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
550   }
551 
debugRange(SourceRange Range)552   void debugRange(SourceRange Range) {
553     printf("Range\n");
554     debugLocation(Range.getBegin());
555     debugLocation(Range.getEnd());
556   }
557 
558 public:
IndexConsumer(CompilerInstance & CI)559   IndexConsumer(CompilerInstance &CI)
560       : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr),
561         AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
562     CI.getPreprocessor().addPPCallbacks(
563         make_unique<PreprocessorHook>(this));
564   }
565 
clone(DiagnosticsEngine & Diags) const566   virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
567     return new IndexConsumer(CI);
568   }
569 
570 #if !defined(_WIN32) && !defined(_WIN64)
571   struct AutoTime {
AutoTimeIndexConsumer::AutoTime572     AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
~AutoTimeIndexConsumer::AutoTime573     ~AutoTime() {
574       if (Start) {
575         *Counter += time() - Start;
576       }
577     }
stopIndexConsumer::AutoTime578     void stop() {
579       *Counter += time() - Start;
580       Start = 0;
581     }
582     double *Counter;
583     double Start;
584   };
585 #endif
586 
587   // All we need is to follow the final declaration.
HandleTranslationUnit(ASTContext & Ctx)588   virtual void HandleTranslationUnit(ASTContext &Ctx) {
589     CurMangleContext =
590       clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
591 
592     AstContext = &Ctx;
593     TraverseDecl(Ctx.getTranslationUnitDecl());
594 
595     // Emit the JSON data for all files now.
596     std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
597     for (It = FileMap.begin(); It != FileMap.end(); It++) {
598       if (!It->second->Interesting) {
599         continue;
600       }
601 
602       FileInfo &Info = *It->second;
603 
604       std::string Filename = Outdir + Info.Realname;
605       std::string SrcFilename = Info.Generated
606         ? Objdir + Info.Realname.substr(GENERATED.length())
607         : Srcdir + PATHSEP_STRING + Info.Realname;
608 
609       ensurePath(Filename);
610 
611       // We lock the output file in case some other clang process is trying to
612       // write to it at the same time.
613       AutoLockFile Lock(SrcFilename, Filename);
614 
615       if (!Lock.success()) {
616         fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
617         exit(1);
618       }
619 
620       // Merge our results with the existing lines from the output file.
621       // This ensures that header files that are included multiple times
622       // in different ways are analyzed completely.
623 
624       FILE *Fp = Lock.openFile();
625       if (!Fp) {
626         fprintf(stderr, "Unable to open input file %s\n", Filename.c_str());
627         exit(1);
628       }
629       FILE *OutFp = Lock.openTmp();
630       if (!OutFp) {
631         fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str());
632         exit(1);
633       }
634 
635       // Sort our new results and get an iterator to them
636       std::sort(Info.Output.begin(), Info.Output.end());
637       std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin();
638       std::string LastNewWritten;
639 
640       // Loop over the existing (sorted) lines in the analysis output file.
641       char Buffer[65536];
642       while (fgets(Buffer, sizeof(Buffer), Fp)) {
643         std::string OldLine(Buffer);
644 
645         // Write any results from Info.Output that are lexicographically
646         // smaller than OldLine (read from the existing file), but make sure
647         // to skip duplicates. Keep advacing NewLinesIter until we reach an
648         // entry that is lexicographically greater than OldLine.
649         for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
650           if (*NewLinesIter > OldLine) {
651             break;
652           }
653           if (*NewLinesIter == OldLine) {
654             continue;
655           }
656           if (*NewLinesIter == LastNewWritten) {
657             // dedupe the new entries being written
658             continue;
659           }
660           if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
661             fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
662             exit(1);
663           }
664           LastNewWritten = *NewLinesIter;
665         }
666 
667         // Write the entry read from the existing file.
668         if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
669           fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
670           exit(1);
671         }
672       }
673 
674       // We finished reading from Fp
675       fclose(Fp);
676 
677       // Finish iterating our new results, discarding duplicates
678       for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
679         if (*NewLinesIter == LastNewWritten) {
680           continue;
681         }
682         if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
683           fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str());
684           exit(1);
685         }
686         LastNewWritten = *NewLinesIter;
687       }
688 
689       // Done writing all the things, close it and replace the old output file
690       // with the new one.
691       fclose(OutFp);
692       if (!Lock.moveTmp()) {
693         fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno);
694         exit(1);
695       }
696     }
697   }
698 
699   // Return a list of mangled names of all the methods that the given method
700   // overrides.
findOverriddenMethods(const CXXMethodDecl * Method,std::vector<std::string> & Symbols)701   void findOverriddenMethods(const CXXMethodDecl *Method,
702                              std::vector<std::string> &Symbols) {
703     std::string Mangled = getMangledName(CurMangleContext, Method);
704     Symbols.push_back(Mangled);
705 
706     CXXMethodDecl::method_iterator Iter = Method->begin_overridden_methods();
707     CXXMethodDecl::method_iterator End = Method->end_overridden_methods();
708     for (; Iter != End; Iter++) {
709       const CXXMethodDecl *Decl = *Iter;
710       if (Decl->isTemplateInstantiation()) {
711         Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
712       }
713       return findOverriddenMethods(Decl, Symbols);
714     }
715   }
716 
717   // Unfortunately, we have to override all these methods in order to track the
718   // context we're inside.
719 
TraverseEnumDecl(EnumDecl * D)720   bool TraverseEnumDecl(EnumDecl *D) {
721     AutoSetContext Asc(this, D);
722     return Super::TraverseEnumDecl(D);
723   }
TraverseRecordDecl(RecordDecl * D)724   bool TraverseRecordDecl(RecordDecl *D) {
725     AutoSetContext Asc(this, D);
726     return Super::TraverseRecordDecl(D);
727   }
TraverseCXXRecordDecl(CXXRecordDecl * D)728   bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
729     AutoSetContext Asc(this, D);
730     return Super::TraverseCXXRecordDecl(D);
731   }
TraverseFunctionDecl(FunctionDecl * D)732   bool TraverseFunctionDecl(FunctionDecl *D) {
733     AutoSetContext Asc(this, D);
734     const FunctionDecl *Def;
735     // (See the larger AutoTemplateContext comment for more information.) If a
736     // method on a templated class is declared out-of-line, we need to analyze
737     // the definition inside the scope of the template or else we won't properly
738     // handle member access on the templated type.
739     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
740       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
741     }
742     return Super::TraverseFunctionDecl(D);
743   }
TraverseCXXMethodDecl(CXXMethodDecl * D)744   bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
745     AutoSetContext Asc(this, D);
746     const FunctionDecl *Def;
747     // See TraverseFunctionDecl.
748     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
749       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
750     }
751     return Super::TraverseCXXMethodDecl(D);
752   }
TraverseCXXConstructorDecl(CXXConstructorDecl * D)753   bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
754     AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
755     const FunctionDecl *Def;
756     // See TraverseFunctionDecl.
757     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
758       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
759     }
760     return Super::TraverseCXXConstructorDecl(D);
761   }
TraverseCXXConversionDecl(CXXConversionDecl * D)762   bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
763     AutoSetContext Asc(this, D);
764     const FunctionDecl *Def;
765     // See TraverseFunctionDecl.
766     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
767       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
768     }
769     return Super::TraverseCXXConversionDecl(D);
770   }
TraverseCXXDestructorDecl(CXXDestructorDecl * D)771   bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
772     AutoSetContext Asc(this, D);
773     const FunctionDecl *Def;
774     // See TraverseFunctionDecl.
775     if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
776       TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
777     }
778     return Super::TraverseCXXDestructorDecl(D);
779   }
780 
781   // Used to keep track of the context in which a token appears.
782   struct Context {
783     // Ultimately this becomes the "context" JSON property.
784     std::string Name;
785 
786     // Ultimately this becomes the "contextsym" JSON property.
787     std::vector<std::string> Symbols;
788 
ContextIndexConsumer::Context789     Context() {}
ContextIndexConsumer::Context790     Context(std::string Name, std::vector<std::string> Symbols)
791         : Name(Name), Symbols(Symbols) {}
792   };
793 
translateContext(NamedDecl * D)794   Context translateContext(NamedDecl *D) {
795     const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
796     if (F && F->isTemplateInstantiation()) {
797       D = F->getTemplateInstantiationPattern();
798     }
799 
800     std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)};
801     if (CXXMethodDecl::classof(D)) {
802       Symbols.clear();
803       findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols);
804     }
805     return Context(D->getQualifiedNameAsString(), Symbols);
806   }
807 
getContext(SourceLocation Loc)808   Context getContext(SourceLocation Loc) {
809     if (SM.isMacroBodyExpansion(Loc)) {
810       // If we're inside a macro definition, we don't return any context. It
811       // will probably not be what the user expects if we do.
812       return Context();
813     }
814 
815     if (CurDeclContext) {
816       return translateContext(CurDeclContext->Decl);
817     }
818     return Context();
819   }
820 
821   // Similar to GetContext(SourceLocation), but it skips the declaration passed
822   // in. This is useful if we want the context of a declaration that's already
823   // on the stack.
getContext(Decl * D)824   Context getContext(Decl *D) {
825     if (SM.isMacroBodyExpansion(D->getLocation())) {
826       // If we're inside a macro definition, we don't return any context. It
827       // will probably not be what the user expects if we do.
828       return Context();
829     }
830 
831     AutoSetContext *Ctxt = CurDeclContext;
832     while (Ctxt) {
833       if (Ctxt->Decl != D) {
834         return translateContext(Ctxt->Decl);
835       }
836       Ctxt = Ctxt->Prev;
837     }
838     return Context();
839   }
840 
concatSymbols(const std::vector<std::string> Symbols)841   static std::string concatSymbols(const std::vector<std::string> Symbols) {
842     if (Symbols.empty()) {
843       return "";
844     }
845 
846     size_t Total = 0;
847     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
848       Total += It->length();
849     }
850     Total += Symbols.size() - 1;
851 
852     std::string SymbolList;
853     SymbolList.reserve(Total);
854 
855     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
856       std::string Symbol = *It;
857 
858       if (It != Symbols.begin()) {
859         SymbolList.push_back(',');
860       }
861       SymbolList.append(Symbol);
862     }
863 
864     return SymbolList;
865   }
866 
867   // Analyzing template code is tricky. Suppose we have this code:
868   //
869   //   template<class T>
870   //   bool Foo(T* ptr) { return T::StaticMethod(ptr); }
871   //
872   // If we analyze the body of Foo without knowing the type T, then we will not
873   // be able to generate any information for StaticMethod. However, analyzing
874   // Foo for every possible instantiation is inefficient and it also generates
875   // too much data in some cases. For example, the following code would generate
876   // one definition of Baz for every instantiation, which is undesirable:
877   //
878   //   template<class T>
879   //   class Bar { struct Baz { ... }; };
880   //
881   // To solve this problem, we analyze templates only once. We do so in a
882   // GatherDependent mode where we look for "dependent scoped member
883   // expressions" (i.e., things like StaticMethod). We keep track of the
884   // locations of these expressions. If we find one or more of them, we analyze
885   // the template for each instantiation, in an AnalyzeDependent mode. This mode
886   // ignores all source locations except for the ones where we found dependent
887   // scoped member expressions before. For these locations, we generate a
888   // separate JSON result for each instantiation.
889   //
890   // We inherit our parent's mode if it is exists.  This is because if our
891   // parent is in analyze mode, it means we've already lived a full life in
892   // gather mode and we must not restart in gather mode or we'll cause the
893   // indexer to visit EVERY identifier, which is way too much data.
894   struct AutoTemplateContext {
AutoTemplateContextIndexConsumer::AutoTemplateContext895     AutoTemplateContext(IndexConsumer *Self)
896         : Self(Self)
897         , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent)
898         , Parent(Self->TemplateStack) {
899       Self->TemplateStack = this;
900     }
901 
~AutoTemplateContextIndexConsumer::AutoTemplateContext902     ~AutoTemplateContext() { Self->TemplateStack = Parent; }
903 
904     // We traverse templates in two modes:
905     enum class Mode {
906       // Gather mode does not traverse into specializations. It looks for
907       // locations where it would help to have more info from template
908       // specializations.
909       GatherDependent,
910 
911       // Analyze mode traverses into template specializations and records
912       // information about token locations saved in gather mode.
913       AnalyzeDependent,
914     };
915 
916     // We found a dependent scoped member expression! Keep track of it for
917     // later.
visitDependentIndexConsumer::AutoTemplateContext918     void visitDependent(SourceLocation Loc) {
919       if (CurMode == Mode::AnalyzeDependent) {
920         return;
921       }
922 
923       DependentLocations.insert(Loc.getRawEncoding());
924       if (Parent) {
925         Parent->visitDependent(Loc);
926       }
927     }
928 
inGatherModeIndexConsumer::AutoTemplateContext929     bool inGatherMode() {
930       return CurMode == Mode::GatherDependent;
931     }
932 
933     // Do we need to perform the extra AnalyzeDependent passes (one per
934     // instantiation)?
needsAnalysisIndexConsumer::AutoTemplateContext935     bool needsAnalysis() const {
936       if (!DependentLocations.empty()) {
937         return true;
938       }
939       if (Parent) {
940         return Parent->needsAnalysis();
941       }
942       return false;
943     }
944 
switchModeIndexConsumer::AutoTemplateContext945     void switchMode() { CurMode = Mode::AnalyzeDependent; }
946 
947     // Do we want to analyze each template instantiation separately?
shouldVisitTemplateInstantiationsIndexConsumer::AutoTemplateContext948     bool shouldVisitTemplateInstantiations() const {
949       if (CurMode == Mode::AnalyzeDependent) {
950         return true;
951       }
952       if (Parent) {
953         return Parent->shouldVisitTemplateInstantiations();
954       }
955       return false;
956     }
957 
958     // For a given expression/statement, should we emit JSON data for it?
shouldVisitIndexConsumer::AutoTemplateContext959     bool shouldVisit(SourceLocation Loc) {
960       if (CurMode == Mode::GatherDependent) {
961         return true;
962       }
963       if (DependentLocations.find(Loc.getRawEncoding()) !=
964           DependentLocations.end()) {
965         return true;
966       }
967       if (Parent) {
968         return Parent->shouldVisit(Loc);
969       }
970       return false;
971     }
972 
973   private:
974     IndexConsumer *Self;
975     Mode CurMode;
976     std::unordered_set<unsigned> DependentLocations;
977     AutoTemplateContext *Parent;
978   };
979 
980   AutoTemplateContext *TemplateStack;
981 
shouldVisitTemplateInstantiations() const982   bool shouldVisitTemplateInstantiations() const {
983     if (TemplateStack) {
984       return TemplateStack->shouldVisitTemplateInstantiations();
985     }
986     return false;
987   }
988 
shouldVisitImplicitCode() const989   bool shouldVisitImplicitCode() const {
990     return CurDeclContext && CurDeclContext->VisitImplicit;
991   }
992 
TraverseClassTemplateDecl(ClassTemplateDecl * D)993   bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
994     AutoTemplateContext Atc(this);
995     Super::TraverseClassTemplateDecl(D);
996 
997     if (!Atc.needsAnalysis()) {
998       return true;
999     }
1000 
1001     Atc.switchMode();
1002 
1003     if (D != D->getCanonicalDecl()) {
1004       return true;
1005     }
1006 
1007     for (auto *Spec : D->specializations()) {
1008       for (auto *Rd : Spec->redecls()) {
1009         // We don't want to visit injected-class-names in this traversal.
1010         if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
1011           continue;
1012 
1013         TraverseDecl(Rd);
1014       }
1015     }
1016 
1017     return true;
1018   }
1019 
TraverseFunctionTemplateDecl(FunctionTemplateDecl * D)1020   bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
1021     AutoTemplateContext Atc(this);
1022     if (Atc.inGatherMode()) {
1023       Super::TraverseFunctionTemplateDecl(D);
1024     }
1025 
1026     if (!Atc.needsAnalysis()) {
1027       return true;
1028     }
1029 
1030     Atc.switchMode();
1031 
1032     if (D != D->getCanonicalDecl()) {
1033       return true;
1034     }
1035 
1036     for (auto *Spec : D->specializations()) {
1037       for (auto *Rd : Spec->redecls()) {
1038         TraverseDecl(Rd);
1039       }
1040     }
1041 
1042     return true;
1043   }
1044 
shouldVisit(SourceLocation Loc)1045   bool shouldVisit(SourceLocation Loc) {
1046     if (TemplateStack) {
1047       return TemplateStack->shouldVisit(Loc);
1048     }
1049     return true;
1050   }
1051 
1052   enum {
1053     // Flag to omit the identifier from being cross-referenced across files.
1054     // This is usually desired for local variables.
1055     NoCrossref = 1 << 0,
1056     // Flag to indicate the token with analysis data is not an identifier. Indicates
1057     // we want to skip the check that tries to ensure a sane identifier token.
1058     NotIdentifierToken = 1 << 1,
1059     // This indicates that the end of the provided SourceRange is valid and
1060     // should be respected. If this flag is not set, the visitIdentifier
1061     // function should use only the start of the SourceRange and auto-detect
1062     // the end based on whatever token is found at the start.
1063     LocRangeEndValid = 1 << 2
1064   };
1065 
1066   // This is the only function that emits analysis JSON data. It should be
1067   // called for each identifier that corresponds to a symbol.
visitIdentifier(const char * Kind,const char * SyntaxKind,llvm::StringRef QualName,SourceRange LocRange,const std::vector<std::string> & Symbols,Context TokenContext=Context (),int Flags=0,SourceRange PeekRange=SourceRange (),SourceRange NestingRange=SourceRange ())1068   void visitIdentifier(const char *Kind, const char *SyntaxKind,
1069                        llvm::StringRef QualName, SourceRange LocRange,
1070                        const std::vector<std::string> &Symbols,
1071                        Context TokenContext = Context(), int Flags = 0,
1072                        SourceRange PeekRange = SourceRange(),
1073                        SourceRange NestingRange = SourceRange()) {
1074     SourceLocation Loc = LocRange.getBegin();
1075     if (!shouldVisit(Loc)) {
1076       return;
1077     }
1078 
1079     // Find the file positions corresponding to the token.
1080     unsigned StartOffset = SM.getFileOffset(Loc);
1081     unsigned EndOffset = (Flags & LocRangeEndValid)
1082         ? SM.getFileOffset(LocRange.getEnd())
1083         : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
1084 
1085     std::string LocStr = locationToString(Loc, EndOffset - StartOffset);
1086     std::string RangeStr = locationToString(Loc, EndOffset - StartOffset);
1087     std::string PeekRangeStr;
1088 
1089     if (!(Flags & NotIdentifierToken)) {
1090       // Get the token's characters so we can make sure it's a valid token.
1091       const char *StartChars = SM.getCharacterData(Loc);
1092       std::string Text(StartChars, EndOffset - StartOffset);
1093       if (!isValidIdentifier(Text)) {
1094         return;
1095       }
1096     }
1097 
1098     FileInfo *F = getFileInfo(Loc);
1099 
1100     std::string SymbolList;
1101 
1102     // Reserve space in symbolList for everything in `symbols`. `symbols` can
1103     // contain some very long strings.
1104     size_t Total = 0;
1105     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
1106       Total += It->length();
1107     }
1108 
1109     // Space for commas.
1110     Total += Symbols.size() - 1;
1111     SymbolList.reserve(Total);
1112 
1113     // For each symbol, generate one "target":1 item. We want to find this line
1114     // if someone searches for any one of these symbols.
1115     for (auto It = Symbols.begin(); It != Symbols.end(); It++) {
1116       std::string Symbol = *It;
1117 
1118       if (!(Flags & NoCrossref)) {
1119         JSONFormatter Fmt;
1120 
1121         Fmt.add("loc", LocStr);
1122         Fmt.add("target", 1);
1123         Fmt.add("kind", Kind);
1124         Fmt.add("pretty", QualName.data());
1125         Fmt.add("sym", Symbol);
1126         if (!TokenContext.Name.empty()) {
1127           Fmt.add("context", TokenContext.Name);
1128         }
1129         std::string ContextSymbol = concatSymbols(TokenContext.Symbols);
1130         if (!ContextSymbol.empty()) {
1131           Fmt.add("contextsym", ContextSymbol);
1132         }
1133         if (PeekRange.isValid()) {
1134           PeekRangeStr = lineRangeToString(PeekRange);
1135           if (!PeekRangeStr.empty()) {
1136             Fmt.add("peekRange", PeekRangeStr);
1137           }
1138         }
1139 
1140         std::string S;
1141         Fmt.format(S);
1142         F->Output.push_back(std::move(S));
1143       }
1144 
1145       if (It != Symbols.begin()) {
1146         SymbolList.push_back(',');
1147       }
1148       SymbolList.append(Symbol);
1149     }
1150 
1151     // Generate a single "source":1 for all the symbols. If we search from here,
1152     // we want to union the results for every symbol in `symbols`.
1153     JSONFormatter Fmt;
1154 
1155     Fmt.add("loc", RangeStr);
1156     Fmt.add("source", 1);
1157 
1158     if (NestingRange.isValid()) {
1159       std::string NestingRangeStr = fullRangeToString(NestingRange);
1160       if (!NestingRangeStr.empty()) {
1161         Fmt.add("nestingRange", NestingRangeStr);
1162       }
1163     }
1164 
1165     std::string Syntax;
1166     if (Flags & NoCrossref) {
1167       Fmt.add("syntax", "");
1168     } else {
1169       Syntax = Kind;
1170       Syntax.push_back(',');
1171       Syntax.append(SyntaxKind);
1172       Fmt.add("syntax", Syntax);
1173     }
1174 
1175     std::string Pretty(SyntaxKind);
1176     Pretty.push_back(' ');
1177     Pretty.append(QualName.data());
1178     Fmt.add("pretty", Pretty);
1179 
1180     Fmt.add("sym", SymbolList);
1181 
1182     if (Flags & NoCrossref) {
1183       Fmt.add("no_crossref", 1);
1184     }
1185 
1186     std::string Buf;
1187     Fmt.format(Buf);
1188     F->Output.push_back(std::move(Buf));
1189   }
1190 
visitIdentifier(const char * Kind,const char * SyntaxKind,llvm::StringRef QualName,SourceLocation Loc,std::string Symbol,Context TokenContext=Context (),int Flags=0,SourceRange PeekRange=SourceRange (),SourceRange NestingRange=SourceRange ())1191   void visitIdentifier(const char *Kind, const char *SyntaxKind,
1192                        llvm::StringRef QualName, SourceLocation Loc, std::string Symbol,
1193                        Context TokenContext = Context(), int Flags = 0,
1194                        SourceRange PeekRange = SourceRange(),
1195                        SourceRange NestingRange = SourceRange()) {
1196     std::vector<std::string> V = {Symbol};
1197     visitIdentifier(Kind, SyntaxKind, QualName, SourceRange(Loc), V, TokenContext, Flags,
1198                     PeekRange, NestingRange);
1199   }
1200 
normalizeLocation(SourceLocation * Loc)1201   void normalizeLocation(SourceLocation *Loc) {
1202     *Loc = SM.getSpellingLoc(*Loc);
1203   }
1204 
1205   // For cases where the left-brace is not directly accessible from the AST,
1206   // helper to use the lexer to find the brace.  Make sure you're picking the
1207   // start location appropriately!
findLeftBraceFromLoc(SourceLocation Loc)1208   SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
1209     return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
1210   }
1211 
1212   // If the provided statement is compound, return its range.
getCompoundStmtRange(Stmt * D)1213   SourceRange getCompoundStmtRange(Stmt* D) {
1214     if (!D) {
1215       return SourceRange();
1216     }
1217 
1218     CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
1219     if (D2) {
1220       return D2->getSourceRange();
1221     }
1222 
1223     return SourceRange();
1224   }
1225 
getFunctionPeekRange(FunctionDecl * D)1226   SourceRange getFunctionPeekRange(FunctionDecl* D) {
1227     // We always start at the start of the function decl, which may include the
1228     // return type on a separate line.
1229     SourceLocation Start = D->getBeginLoc();
1230 
1231     // By default, we end at the line containing the function's name.
1232     SourceLocation End = D->getLocation();
1233 
1234     std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1235 
1236     // But if there are parameters, we want to include those as well.
1237     for (ParmVarDecl* Param : D->parameters()) {
1238       std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation());
1239 
1240       // It's possible there are macros involved or something. We don't include
1241       // the parameters in that case.
1242       if (ParamLoc.first == FuncLoc.first) {
1243         // Assume parameters are in order, so we always take the last one.
1244         End = Param->getEndLoc();
1245       }
1246     }
1247 
1248     return SourceRange(Start, End);
1249   }
1250 
getTagPeekRange(TagDecl * D)1251   SourceRange getTagPeekRange(TagDecl* D) {
1252     SourceLocation Start = D->getBeginLoc();
1253 
1254     // By default, we end at the line containing the name.
1255     SourceLocation End = D->getLocation();
1256 
1257     std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
1258 
1259     if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) {
1260       // But if there are parameters, we want to include those as well.
1261       for (CXXBaseSpecifier& Base : D2->bases()) {
1262         std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc());
1263 
1264         // It's possible there are macros involved or something. We don't include
1265         // the parameters in that case.
1266         if (Loc.first == FuncLoc.first) {
1267           // Assume parameters are in order, so we always take the last one.
1268           End = Base.getEndLoc();
1269         }
1270       }
1271     }
1272 
1273     return SourceRange(Start, End);
1274   }
1275 
getCommentRange(NamedDecl * D)1276   SourceRange getCommentRange(NamedDecl* D) {
1277     const RawComment* RC =
1278       AstContext->getRawCommentForDeclNoCache(D);
1279     if (!RC) {
1280       return SourceRange();
1281     }
1282 
1283     return RC->getSourceRange();
1284   }
1285 
1286   // Sanity checks that all ranges are in the same file, returning the first if
1287   // they're in different files.  Unions the ranges based on which is first.
combineRanges(SourceRange Range1,SourceRange Range2)1288   SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
1289     if (Range1.isInvalid()) {
1290       return Range2;
1291     }
1292     if (Range2.isInvalid()) {
1293       return Range1;
1294     }
1295 
1296     std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin());
1297     std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd());
1298     std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin());
1299     std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd());
1300 
1301     if (End1.first != Begin2.first) {
1302       // Something weird is probably happening with the preprocessor. Just
1303       // return the first range.
1304       return Range1;
1305     }
1306 
1307     // See which range comes first.
1308     if (Begin1.second <= End2.second) {
1309       return SourceRange(Range1.getBegin(), Range2.getEnd());
1310     } else {
1311       return SourceRange(Range2.getBegin(), Range1.getEnd());
1312     }
1313   }
1314 
1315   // Given a location and a range, returns the range if:
1316   // - The location and the range live in the same file.
1317   // - The range is well ordered (end is not before begin).
1318   // Returns an empty range otherwise.
validateRange(SourceLocation Loc,SourceRange Range)1319   SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
1320     std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
1321     std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
1322     std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
1323 
1324     if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
1325       return SourceRange();
1326     }
1327 
1328     if (Begin.second >= End.second) {
1329       return SourceRange();
1330     }
1331 
1332     return Range;
1333   }
1334 
VisitNamedDecl(NamedDecl * D)1335   bool VisitNamedDecl(NamedDecl *D) {
1336     SourceLocation Loc = D->getLocation();
1337 
1338     // If the token is from a macro expansion and the expansion location
1339     // is interesting, use that instead as it tends to be more useful.
1340     SourceLocation expandedLoc = Loc;
1341     if (SM.isMacroBodyExpansion(Loc)) {
1342       Loc = SM.getFileLoc(Loc);
1343     }
1344 
1345     normalizeLocation(&Loc);
1346     if (!isInterestingLocation(Loc)) {
1347       return true;
1348     }
1349 
1350     if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
1351       // Unnamed parameter in function proto.
1352       return true;
1353     }
1354 
1355     int Flags = 0;
1356     const char *Kind = "def";
1357     const char *PrettyKind = "?";
1358     SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
1359     // The nesting range identifies the left brace and right brace, which
1360     // heavily depends on the AST node type.
1361     SourceRange NestingRange;
1362     if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
1363       if (D2->isTemplateInstantiation()) {
1364         D = D2->getTemplateInstantiationPattern();
1365       }
1366       Kind = D2->isThisDeclarationADefinition() ? "def" : "decl";
1367       PrettyKind = "function";
1368       PeekRange = getFunctionPeekRange(D2);
1369 
1370       // Only emit the nesting range if:
1371       // - This is a definition AND
1372       // - This isn't a template instantiation.  Function templates'
1373       //   instantiations can end up as a definition with a Loc at their point
1374       //   of declaration but with the CompoundStmt of the template's
1375       //   point of definition.  This really messes up the nesting range logic.
1376       //   At the time of writing this, the test repo's `big_header.h`'s
1377       //   `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
1378       //   instantiated by `big_cpp.cpp` triggers this phenomenon.
1379       //
1380       // Note: As covered elsewhere, template processing is tricky and it's
1381       // conceivable that we may change traversal patterns in the future,
1382       // mooting this guard.
1383       if (D2->isThisDeclarationADefinition() &&
1384           !D2->isTemplateInstantiation()) {
1385         // The CompoundStmt range is the brace range.
1386         NestingRange = getCompoundStmtRange(D2->getBody());
1387       }
1388     } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
1389       Kind = D2->isThisDeclarationADefinition() ? "def" : "decl";
1390       PrettyKind = "type";
1391 
1392       if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
1393         PeekRange = getTagPeekRange(D2);
1394         NestingRange = D2->getBraceRange();
1395       } else {
1396         PeekRange = SourceRange();
1397       }
1398     } else if (isa<TypedefNameDecl>(D)) {
1399       Kind = "def";
1400       PrettyKind = "type";
1401       PeekRange = SourceRange(Loc, Loc);
1402     } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
1403       if (D2->isLocalVarDeclOrParm()) {
1404         Flags = NoCrossref;
1405       }
1406 
1407       Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
1408                  ? "decl"
1409                  : "def";
1410       PrettyKind = "variable";
1411     } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
1412       Kind = "def";
1413       PrettyKind = "namespace";
1414       PeekRange = SourceRange(Loc, Loc);
1415       NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
1416       if (D2) {
1417         // There's no exposure of the left brace so we have to find it.
1418         NestingRange = SourceRange(
1419           findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc),
1420           D2->getRBraceLoc());
1421       }
1422     } else if (isa<FieldDecl>(D)) {
1423       Kind = "def";
1424       PrettyKind = "field";
1425     } else if (isa<EnumConstantDecl>(D)) {
1426       Kind = "def";
1427       PrettyKind = "enum constant";
1428     } else {
1429       return true;
1430     }
1431 
1432     SourceRange CommentRange = getCommentRange(D);
1433     PeekRange = combineRanges(PeekRange, CommentRange);
1434     PeekRange = validateRange(Loc, PeekRange);
1435     NestingRange = validateRange(Loc, NestingRange);
1436 
1437     std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)};
1438     if (CXXMethodDecl::classof(D)) {
1439       Symbols.clear();
1440       findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols);
1441     }
1442 
1443     // In the case of destructors, Loc might point to the ~ character. In that
1444     // case we want to skip to the name of the class. However, Loc might also
1445     // point to other places that generate destructors, such as the use site of
1446     // a macro that expands to generate a destructor, or a lambda (apparently
1447     // clang 8 creates a destructor declaration for at least some lambdas). In
1448     // the former case we'll use the macro use site as the location, and in the
1449     // latter we'll just drop the declaration.
1450     if (isa<CXXDestructorDecl>(D)) {
1451       PrettyKind = "destructor";
1452       const char *P = SM.getCharacterData(Loc);
1453       if (*P == '~') {
1454         // Advance Loc to the class name
1455         P++;
1456 
1457         unsigned Skipped = 1;
1458         while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
1459           P++;
1460           Skipped++;
1461         }
1462 
1463         Loc = Loc.getLocWithOffset(Skipped);
1464       } else {
1465         // See if the destructor is coming from a macro expansion
1466         P = SM.getCharacterData(expandedLoc);
1467         if (*P != '~') {
1468           // It's not
1469           return true;
1470         }
1471         // It is, so just use Loc as-is
1472       }
1473     }
1474 
1475     visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbols,
1476                     getContext(D), Flags, PeekRange, NestingRange);
1477 
1478     return true;
1479   }
1480 
VisitCXXConstructExpr(CXXConstructExpr * E)1481   bool VisitCXXConstructExpr(CXXConstructExpr *E) {
1482     SourceLocation Loc = E->getBeginLoc();
1483     normalizeLocation(&Loc);
1484     if (!isInterestingLocation(Loc)) {
1485       return true;
1486     }
1487 
1488     FunctionDecl *Ctor = E->getConstructor();
1489     if (Ctor->isTemplateInstantiation()) {
1490       Ctor = Ctor->getTemplateInstantiationPattern();
1491     }
1492     std::string Mangled = getMangledName(CurMangleContext, Ctor);
1493 
1494     // FIXME: Need to do something different for list initialization.
1495 
1496     visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
1497                     getContext(Loc));
1498 
1499     return true;
1500   }
1501 
VisitCallExpr(CallExpr * E)1502   bool VisitCallExpr(CallExpr *E) {
1503     Decl *Callee = E->getCalleeDecl();
1504     if (!Callee || !FunctionDecl::classof(Callee)) {
1505       return true;
1506     }
1507 
1508     const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
1509 
1510     SourceLocation Loc;
1511 
1512     const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
1513     if (F->isTemplateInstantiation()) {
1514       NamedCallee = F->getTemplateInstantiationPattern();
1515     }
1516 
1517     std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
1518     int Flags = 0;
1519 
1520     Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
1521 
1522     if (CXXOperatorCallExpr::classof(E)) {
1523       // Just take the first token.
1524       CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
1525       Loc = Op->getOperatorLoc();
1526       Flags |= NotIdentifierToken;
1527     } else if (MemberExpr::classof(CalleeExpr)) {
1528       MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
1529       Loc = Member->getMemberLoc();
1530     } else if (DeclRefExpr::classof(CalleeExpr)) {
1531       // We handle this in VisitDeclRefExpr.
1532       return true;
1533     } else {
1534       return true;
1535     }
1536 
1537     normalizeLocation(&Loc);
1538 
1539     if (!isInterestingLocation(Loc)) {
1540       return true;
1541     }
1542 
1543     visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled,
1544                     getContext(Loc), Flags);
1545 
1546     return true;
1547   }
1548 
VisitTagTypeLoc(TagTypeLoc L)1549   bool VisitTagTypeLoc(TagTypeLoc L) {
1550     SourceLocation Loc = L.getBeginLoc();
1551     normalizeLocation(&Loc);
1552     if (!isInterestingLocation(Loc)) {
1553       return true;
1554     }
1555 
1556     TagDecl *Decl = L.getDecl();
1557     std::string Mangled = getMangledName(CurMangleContext, Decl);
1558     visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1559                     getContext(Loc));
1560     return true;
1561   }
1562 
VisitTypedefTypeLoc(TypedefTypeLoc L)1563   bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
1564     SourceLocation Loc = L.getBeginLoc();
1565     normalizeLocation(&Loc);
1566     if (!isInterestingLocation(Loc)) {
1567       return true;
1568     }
1569 
1570     NamedDecl *Decl = L.getTypedefNameDecl();
1571     std::string Mangled = getMangledName(CurMangleContext, Decl);
1572     visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1573                     getContext(Loc));
1574     return true;
1575   }
1576 
VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L)1577   bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
1578     SourceLocation Loc = L.getBeginLoc();
1579     normalizeLocation(&Loc);
1580     if (!isInterestingLocation(Loc)) {
1581       return true;
1582     }
1583 
1584     NamedDecl *Decl = L.getDecl();
1585     std::string Mangled = getMangledName(CurMangleContext, Decl);
1586     visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1587                     getContext(Loc));
1588     return true;
1589   }
1590 
VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L)1591   bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
1592     SourceLocation Loc = L.getBeginLoc();
1593     normalizeLocation(&Loc);
1594     if (!isInterestingLocation(Loc)) {
1595       return true;
1596     }
1597 
1598     TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
1599     if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
1600       NamedDecl *Decl = D->getTemplatedDecl();
1601       std::string Mangled = getMangledName(CurMangleContext, Decl);
1602       visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1603                       getContext(Loc));
1604     } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
1605       NamedDecl *Decl = D->getTemplatedDecl();
1606       std::string Mangled = getMangledName(CurMangleContext, Decl);
1607       visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
1608                       getContext(Loc));
1609     }
1610 
1611     return true;
1612   }
1613 
VisitDeclRefExpr(DeclRefExpr * E)1614   bool VisitDeclRefExpr(DeclRefExpr *E) {
1615     SourceLocation Loc = E->getExprLoc();
1616     normalizeLocation(&Loc);
1617     if (!isInterestingLocation(Loc)) {
1618       return true;
1619     }
1620 
1621     if (E->hasQualifier()) {
1622       Loc = E->getNameInfo().getLoc();
1623       normalizeLocation(&Loc);
1624     }
1625 
1626     NamedDecl *Decl = E->getDecl();
1627     if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
1628       int Flags = 0;
1629       if (D2->isLocalVarDeclOrParm()) {
1630         Flags = NoCrossref;
1631       }
1632       std::string Mangled = getMangledName(CurMangleContext, Decl);
1633       visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
1634                       getContext(Loc), Flags);
1635     } else if (isa<FunctionDecl>(Decl)) {
1636       const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
1637       if (F->isTemplateInstantiation()) {
1638         Decl = F->getTemplateInstantiationPattern();
1639       }
1640 
1641       std::string Mangled = getMangledName(CurMangleContext, Decl);
1642       visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
1643                       getContext(Loc));
1644     } else if (isa<EnumConstantDecl>(Decl)) {
1645       std::string Mangled = getMangledName(CurMangleContext, Decl);
1646       visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
1647                       getContext(Loc));
1648     }
1649 
1650     return true;
1651   }
1652 
VisitCXXConstructorDecl(CXXConstructorDecl * D)1653   bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
1654     if (!isInterestingLocation(D->getLocation())) {
1655       return true;
1656     }
1657 
1658     for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
1659          It != D->init_end(); ++It) {
1660       const CXXCtorInitializer *Ci = *It;
1661       if (!Ci->getMember() || !Ci->isWritten()) {
1662         continue;
1663       }
1664 
1665       SourceLocation Loc = Ci->getMemberLocation();
1666       normalizeLocation(&Loc);
1667       if (!isInterestingLocation(Loc)) {
1668         continue;
1669       }
1670 
1671       FieldDecl *Member = Ci->getMember();
1672       std::string Mangled = getMangledName(CurMangleContext, Member);
1673       visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
1674                       getContext(D));
1675     }
1676 
1677     return true;
1678   }
1679 
VisitMemberExpr(MemberExpr * E)1680   bool VisitMemberExpr(MemberExpr *E) {
1681     SourceLocation Loc = E->getExprLoc();
1682     normalizeLocation(&Loc);
1683     if (!isInterestingLocation(Loc)) {
1684       return true;
1685     }
1686 
1687     ValueDecl *Decl = E->getMemberDecl();
1688     if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
1689       std::string Mangled = getMangledName(CurMangleContext, Field);
1690       visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
1691                       getContext(Loc));
1692     }
1693     return true;
1694   }
1695 
VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr * E)1696   bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
1697     SourceLocation Loc = E->getMemberLoc();
1698     normalizeLocation(&Loc);
1699     if (!isInterestingLocation(Loc)) {
1700       return true;
1701     }
1702 
1703     if (TemplateStack) {
1704       TemplateStack->visitDependent(Loc);
1705     }
1706     return true;
1707   }
1708 
enterSourceFile(SourceLocation Loc)1709   void enterSourceFile(SourceLocation Loc) {
1710     normalizeLocation(&Loc);
1711     FileInfo* newFile = getFileInfo(Loc);
1712     if (!newFile->Interesting) {
1713       return;
1714     }
1715     FileType type = newFile->Generated ? FileType::Generated : FileType::Source;
1716     std::vector<std::string> symbols = {
1717         std::string("FILE_") + mangleFile(newFile->Realname, type)
1718     };
1719     // We use an explicit zero-length source range at the start of the file. If we
1720     // don't set the LocRangeEndValid flag, the visitIdentifier code will use the
1721     // entire first token, which could be e.g. a long multiline-comment.
1722     visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc),
1723                     symbols, Context(), NotIdentifierToken | LocRangeEndValid);
1724   }
1725 
inclusionDirective(SourceRange FileNameRange,const FileEntry * File)1726   void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) {
1727     std::string includedFile(File->tryGetRealPathName());
1728     FileType type = relativizePath(includedFile);
1729     if (type == FileType::Unknown) {
1730       return;
1731     }
1732     std::vector<std::string> symbols = {
1733         std::string("FILE_") + mangleFile(includedFile, type)
1734     };
1735     visitIdentifier("use", "file", includedFile, FileNameRange, symbols,
1736                     Context(), NotIdentifierToken | LocRangeEndValid);
1737   }
1738 
macroDefined(const Token & Tok,const MacroDirective * Macro)1739   void macroDefined(const Token &Tok, const MacroDirective *Macro) {
1740     if (Macro->getMacroInfo()->isBuiltinMacro()) {
1741       return;
1742     }
1743     SourceLocation Loc = Tok.getLocation();
1744     normalizeLocation(&Loc);
1745     if (!isInterestingLocation(Loc)) {
1746       return;
1747     }
1748 
1749     IdentifierInfo *Ident = Tok.getIdentifierInfo();
1750     if (Ident) {
1751       std::string Mangled =
1752           std::string("M_") + mangleLocation(Loc, std::string(Ident->getName()));
1753       visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
1754     }
1755   }
1756 
macroUsed(const Token & Tok,const MacroInfo * Macro)1757   void macroUsed(const Token &Tok, const MacroInfo *Macro) {
1758     if (!Macro) {
1759       return;
1760     }
1761     if (Macro->isBuiltinMacro()) {
1762       return;
1763     }
1764     SourceLocation Loc = Tok.getLocation();
1765     normalizeLocation(&Loc);
1766     if (!isInterestingLocation(Loc)) {
1767       return;
1768     }
1769 
1770     IdentifierInfo *Ident = Tok.getIdentifierInfo();
1771     if (Ident) {
1772       std::string Mangled =
1773           std::string("M_") +
1774           mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName()));
1775       visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
1776     }
1777   }
1778 };
1779 
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind FileType,FileID PrevFID=FileID ())1780 void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason,
1781                                    SrcMgr::CharacteristicKind FileType,
1782                                    FileID PrevFID = FileID()) {
1783   switch (Reason) {
1784     case PPCallbacks::RenameFile:
1785     case PPCallbacks::SystemHeaderPragma:
1786       // Don't care about these, since we want the actual on-disk filenames
1787       break;
1788     case PPCallbacks::EnterFile:
1789       Indexer->enterSourceFile(Loc);
1790       break;
1791     case PPCallbacks::ExitFile:
1792       // Don't care about exiting files
1793       break;
1794   }
1795 }
1796 
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,StringRef FileName,bool IsAngled,CharSourceRange FileNameRange,const FileEntry * File,StringRef SearchPath,StringRef RelativePath,const Module * Imported,SrcMgr::CharacteristicKind FileType)1797 void PreprocessorHook::InclusionDirective(SourceLocation HashLoc,
1798                                           const Token &IncludeTok,
1799                                           StringRef FileName,
1800                                           bool IsAngled,
1801                                           CharSourceRange FileNameRange,
1802                                           const FileEntry *File,
1803                                           StringRef SearchPath,
1804                                           StringRef RelativePath,
1805                                           const Module *Imported,
1806                                           SrcMgr::CharacteristicKind FileType) {
1807   Indexer->inclusionDirective(FileNameRange.getAsRange(), File);
1808 }
1809 
MacroDefined(const Token & Tok,const MacroDirective * Md)1810 void PreprocessorHook::MacroDefined(const Token &Tok,
1811                                     const MacroDirective *Md) {
1812   Indexer->macroDefined(Tok, Md);
1813 }
1814 
MacroExpands(const Token & Tok,const MacroDefinition & Md,SourceRange Range,const MacroArgs * Ma)1815 void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
1816                                     SourceRange Range, const MacroArgs *Ma) {
1817   Indexer->macroUsed(Tok, Md.getMacroInfo());
1818 }
1819 
MacroUndefined(const Token & Tok,const MacroDefinition & Md,const MacroDirective * Undef)1820 void PreprocessorHook::MacroUndefined(const Token &Tok,
1821                                       const MacroDefinition &Md,
1822                                       const MacroDirective *Undef)
1823 {
1824   Indexer->macroUsed(Tok, Md.getMacroInfo());
1825 }
1826 
Defined(const Token & Tok,const MacroDefinition & Md,SourceRange Range)1827 void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
1828                                SourceRange Range) {
1829   Indexer->macroUsed(Tok, Md.getMacroInfo());
1830 }
1831 
Ifdef(SourceLocation Loc,const Token & Tok,const MacroDefinition & Md)1832 void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
1833                              const MacroDefinition &Md) {
1834   Indexer->macroUsed(Tok, Md.getMacroInfo());
1835 }
1836 
Ifndef(SourceLocation Loc,const Token & Tok,const MacroDefinition & Md)1837 void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
1838                               const MacroDefinition &Md) {
1839   Indexer->macroUsed(Tok, Md.getMacroInfo());
1840 }
1841 
1842 class IndexAction : public PluginASTAction {
1843 protected:
CreateASTConsumer(CompilerInstance & CI,llvm::StringRef F)1844   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
1845                                                  llvm::StringRef F) {
1846     return make_unique<IndexConsumer>(CI);
1847   }
1848 
ParseArgs(const CompilerInstance & CI,const std::vector<std::string> & Args)1849   bool ParseArgs(const CompilerInstance &CI,
1850                  const std::vector<std::string> &Args) {
1851     if (Args.size() != 3) {
1852       DiagnosticsEngine &D = CI.getDiagnostics();
1853       unsigned DiagID = D.getCustomDiagID(
1854           DiagnosticsEngine::Error,
1855           "Need arguments for the source, output, and object directories");
1856       D.Report(DiagID);
1857       return false;
1858     }
1859 
1860     // Load our directories
1861     Srcdir = getAbsolutePath(Args[0]);
1862     if (Srcdir.empty()) {
1863       DiagnosticsEngine &D = CI.getDiagnostics();
1864       unsigned DiagID = D.getCustomDiagID(
1865           DiagnosticsEngine::Error, "Source directory '%0' does not exist");
1866       D.Report(DiagID) << Args[0];
1867       return false;
1868     }
1869 
1870     ensurePath(Args[1] + PATHSEP_STRING);
1871     Outdir = getAbsolutePath(Args[1]);
1872     Outdir += PATHSEP_STRING;
1873 
1874     Objdir = getAbsolutePath(Args[2]);
1875     if (Objdir.empty()) {
1876       DiagnosticsEngine &D = CI.getDiagnostics();
1877       unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
1878                                           "Objdir '%0' does not exist");
1879       D.Report(DiagID) << Args[2];
1880       return false;
1881     }
1882     Objdir += PATHSEP_STRING;
1883 
1884     printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
1885            Objdir.c_str());
1886 
1887     return true;
1888   }
1889 
printHelp(llvm::raw_ostream & Ros)1890   void printHelp(llvm::raw_ostream &Ros) {
1891     Ros << "Help for mozsearch plugin goes here\n";
1892   }
1893 };
1894 
1895 static FrontendPluginRegistry::Add<IndexAction>
1896     Y("mozsearch-index", "create the mozsearch index database");
1897