1 //===--- PlistDiagnostics.cpp - Plist Diagnostics for Paths -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the PlistDiagnostics object.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Analysis/PathDiagnostic.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/PlistSupport.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Basic/Version.h"
18 #include "clang/CrossTU/CrossTranslationUnit.h"
19 #include "clang/Frontend/ASTUnit.h"
20 #include "clang/Lex/Preprocessor.h"
21 #include "clang/Lex/TokenConcatenation.h"
22 #include "clang/Rewrite/Core/HTMLRewrite.h"
23 #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
24 #include "clang/StaticAnalyzer/Core/IssueHash.h"
25 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/Support/Casting.h"
30 #include <memory>
31 
32 using namespace clang;
33 using namespace ento;
34 using namespace markup;
35 
36 //===----------------------------------------------------------------------===//
37 // Declarations of helper classes and functions for emitting bug reports in
38 // plist format.
39 //===----------------------------------------------------------------------===//
40 
41 namespace {
42   class PlistDiagnostics : public PathDiagnosticConsumer {
43     const std::string OutputFile;
44     const Preprocessor &PP;
45     const cross_tu::CrossTranslationUnitContext &CTU;
46     AnalyzerOptions &AnOpts;
47     const bool SupportsCrossFileDiagnostics;
48   public:
49     PlistDiagnostics(AnalyzerOptions &AnalyzerOpts,
50                      const std::string &OutputFile, const Preprocessor &PP,
51                      const cross_tu::CrossTranslationUnitContext &CTU,
52                      bool supportsMultipleFiles);
53 
~PlistDiagnostics()54     ~PlistDiagnostics() override {}
55 
56     void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
57                               FilesMade *filesMade) override;
58 
getName() const59     StringRef getName() const override {
60       return "PlistDiagnostics";
61     }
62 
getGenerationScheme() const63     PathGenerationScheme getGenerationScheme() const override {
64       return Extensive;
65     }
supportsLogicalOpControlFlow() const66     bool supportsLogicalOpControlFlow() const override { return true; }
supportsCrossFileDiagnostics() const67     bool supportsCrossFileDiagnostics() const override {
68       return SupportsCrossFileDiagnostics;
69     }
70   };
71 } // end anonymous namespace
72 
73 namespace {
74 
75 /// A helper class for emitting a single report.
76 class PlistPrinter {
77   const FIDMap& FM;
78   AnalyzerOptions &AnOpts;
79   const Preprocessor &PP;
80   const cross_tu::CrossTranslationUnitContext &CTU;
81   llvm::SmallVector<const PathDiagnosticMacroPiece *, 0> MacroPieces;
82 
83 public:
PlistPrinter(const FIDMap & FM,AnalyzerOptions & AnOpts,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU)84   PlistPrinter(const FIDMap& FM, AnalyzerOptions &AnOpts,
85                const Preprocessor &PP,
86                const cross_tu::CrossTranslationUnitContext &CTU)
87     : FM(FM), AnOpts(AnOpts), PP(PP), CTU(CTU) {
88   }
89 
ReportDiag(raw_ostream & o,const PathDiagnosticPiece & P)90   void ReportDiag(raw_ostream &o, const PathDiagnosticPiece& P) {
91     ReportPiece(o, P, /*indent*/ 4, /*depth*/ 0, /*includeControlFlow*/ true);
92 
93     // Don't emit a warning about an unused private field.
94     (void)AnOpts;
95   }
96 
97   /// Print the expansions of the collected macro pieces.
98   ///
99   /// Each time ReportDiag is called on a PathDiagnosticMacroPiece (or, if one
100   /// is found through a call piece, etc), it's subpieces are reported, and the
101   /// piece itself is collected. Call this function after the entire bugpath
102   /// was reported.
103   void ReportMacroExpansions(raw_ostream &o, unsigned indent);
104 
105 private:
ReportPiece(raw_ostream & o,const PathDiagnosticPiece & P,unsigned indent,unsigned depth,bool includeControlFlow,bool isKeyEvent=false)106   void ReportPiece(raw_ostream &o, const PathDiagnosticPiece &P,
107                    unsigned indent, unsigned depth, bool includeControlFlow,
108                    bool isKeyEvent = false) {
109     switch (P.getKind()) {
110       case PathDiagnosticPiece::ControlFlow:
111         if (includeControlFlow)
112           ReportControlFlow(o, cast<PathDiagnosticControlFlowPiece>(P), indent);
113         break;
114       case PathDiagnosticPiece::Call:
115         ReportCall(o, cast<PathDiagnosticCallPiece>(P), indent,
116                    depth);
117         break;
118       case PathDiagnosticPiece::Event:
119         ReportEvent(o, cast<PathDiagnosticEventPiece>(P), indent, depth,
120                     isKeyEvent);
121         break;
122       case PathDiagnosticPiece::Macro:
123         ReportMacroSubPieces(o, cast<PathDiagnosticMacroPiece>(P), indent,
124                              depth);
125         break;
126       case PathDiagnosticPiece::Note:
127         ReportNote(o, cast<PathDiagnosticNotePiece>(P), indent);
128         break;
129       case PathDiagnosticPiece::PopUp:
130         ReportPopUp(o, cast<PathDiagnosticPopUpPiece>(P), indent);
131         break;
132     }
133   }
134 
135   void EmitRanges(raw_ostream &o, const ArrayRef<SourceRange> Ranges,
136                   unsigned indent);
137   void EmitMessage(raw_ostream &o, StringRef Message, unsigned indent);
138   void EmitFixits(raw_ostream &o, ArrayRef<FixItHint> fixits, unsigned indent);
139 
140   void ReportControlFlow(raw_ostream &o,
141                          const PathDiagnosticControlFlowPiece& P,
142                          unsigned indent);
143   void ReportEvent(raw_ostream &o, const PathDiagnosticEventPiece& P,
144                    unsigned indent, unsigned depth, bool isKeyEvent = false);
145   void ReportCall(raw_ostream &o, const PathDiagnosticCallPiece &P,
146                   unsigned indent, unsigned depth);
147   void ReportMacroSubPieces(raw_ostream &o, const PathDiagnosticMacroPiece& P,
148                             unsigned indent, unsigned depth);
149   void ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
150                   unsigned indent);
151 
152   void ReportPopUp(raw_ostream &o, const PathDiagnosticPopUpPiece &P,
153                    unsigned indent);
154 };
155 
156 } // end of anonymous namespace
157 
158 namespace {
159 
160 struct ExpansionInfo {
161   std::string MacroName;
162   std::string Expansion;
ExpansionInfo__anon9a8b21cc0311::ExpansionInfo163   ExpansionInfo(std::string N, std::string E)
164     : MacroName(std::move(N)), Expansion(std::move(E)) {}
165 };
166 
167 } // end of anonymous namespace
168 
169 static void printBugPath(llvm::raw_ostream &o, const FIDMap& FM,
170                          AnalyzerOptions &AnOpts, const Preprocessor &PP,
171                          const cross_tu::CrossTranslationUnitContext &CTU,
172                          const PathPieces &Path);
173 
174 /// Print coverage information to output stream {@code o}.
175 /// May modify the used list of files {@code Fids} by inserting new ones.
176 static void printCoverage(const PathDiagnostic *D,
177                           unsigned InputIndentLevel,
178                           SmallVectorImpl<FileID> &Fids,
179                           FIDMap &FM,
180                           llvm::raw_fd_ostream &o);
181 
182 static ExpansionInfo
183 getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP,
184                  const cross_tu::CrossTranslationUnitContext &CTU);
185 
186 //===----------------------------------------------------------------------===//
187 // Methods of PlistPrinter.
188 //===----------------------------------------------------------------------===//
189 
EmitRanges(raw_ostream & o,const ArrayRef<SourceRange> Ranges,unsigned indent)190 void PlistPrinter::EmitRanges(raw_ostream &o,
191                               const ArrayRef<SourceRange> Ranges,
192                               unsigned indent) {
193 
194   if (Ranges.empty())
195     return;
196 
197   Indent(o, indent) << "<key>ranges</key>\n";
198   Indent(o, indent) << "<array>\n";
199   ++indent;
200 
201   const SourceManager &SM = PP.getSourceManager();
202   const LangOptions &LangOpts = PP.getLangOpts();
203 
204   for (auto &R : Ranges)
205     EmitRange(o, SM,
206               Lexer::getAsCharRange(SM.getExpansionRange(R), SM, LangOpts),
207               FM, indent + 1);
208   --indent;
209   Indent(o, indent) << "</array>\n";
210 }
211 
EmitMessage(raw_ostream & o,StringRef Message,unsigned indent)212 void PlistPrinter::EmitMessage(raw_ostream &o, StringRef Message,
213                                unsigned indent) {
214   // Output the text.
215   assert(!Message.empty());
216   Indent(o, indent) << "<key>extended_message</key>\n";
217   Indent(o, indent);
218   EmitString(o, Message) << '\n';
219 
220   // Output the short text.
221   // FIXME: Really use a short string.
222   Indent(o, indent) << "<key>message</key>\n";
223   Indent(o, indent);
224   EmitString(o, Message) << '\n';
225 }
226 
EmitFixits(raw_ostream & o,ArrayRef<FixItHint> fixits,unsigned indent)227 void PlistPrinter::EmitFixits(raw_ostream &o, ArrayRef<FixItHint> fixits,
228                               unsigned indent) {
229   if (fixits.size() == 0)
230     return;
231 
232   const SourceManager &SM = PP.getSourceManager();
233   const LangOptions &LangOpts = PP.getLangOpts();
234 
235   Indent(o, indent) << "<key>fixits</key>\n";
236   Indent(o, indent) << "<array>\n";
237   for (const auto &fixit : fixits) {
238     assert(!fixit.isNull());
239     // FIXME: Add support for InsertFromRange and BeforePreviousInsertion.
240     assert(!fixit.InsertFromRange.isValid() && "Not implemented yet!");
241     assert(!fixit.BeforePreviousInsertions && "Not implemented yet!");
242     Indent(o, indent) << " <dict>\n";
243     Indent(o, indent) << "  <key>remove_range</key>\n";
244     EmitRange(o, SM, Lexer::getAsCharRange(fixit.RemoveRange, SM, LangOpts),
245               FM, indent + 2);
246     Indent(o, indent) << "  <key>insert_string</key>";
247     EmitString(o, fixit.CodeToInsert);
248     o << "\n";
249     Indent(o, indent) << " </dict>\n";
250   }
251   Indent(o, indent) << "</array>\n";
252 }
253 
ReportControlFlow(raw_ostream & o,const PathDiagnosticControlFlowPiece & P,unsigned indent)254 void PlistPrinter::ReportControlFlow(raw_ostream &o,
255                                      const PathDiagnosticControlFlowPiece& P,
256                                      unsigned indent) {
257 
258   const SourceManager &SM = PP.getSourceManager();
259   const LangOptions &LangOpts = PP.getLangOpts();
260 
261   Indent(o, indent) << "<dict>\n";
262   ++indent;
263 
264   Indent(o, indent) << "<key>kind</key><string>control</string>\n";
265 
266   // Emit edges.
267   Indent(o, indent) << "<key>edges</key>\n";
268   ++indent;
269   Indent(o, indent) << "<array>\n";
270   ++indent;
271   for (PathDiagnosticControlFlowPiece::const_iterator I=P.begin(), E=P.end();
272        I!=E; ++I) {
273     Indent(o, indent) << "<dict>\n";
274     ++indent;
275 
276     // Make the ranges of the start and end point self-consistent with adjacent edges
277     // by forcing to use only the beginning of the range.  This simplifies the layout
278     // logic for clients.
279     Indent(o, indent) << "<key>start</key>\n";
280     SourceRange StartEdge(
281         SM.getExpansionLoc(I->getStart().asRange().getBegin()));
282     EmitRange(o, SM, Lexer::getAsCharRange(StartEdge, SM, LangOpts), FM,
283               indent + 1);
284 
285     Indent(o, indent) << "<key>end</key>\n";
286     SourceRange EndEdge(SM.getExpansionLoc(I->getEnd().asRange().getBegin()));
287     EmitRange(o, SM, Lexer::getAsCharRange(EndEdge, SM, LangOpts), FM,
288               indent + 1);
289 
290     --indent;
291     Indent(o, indent) << "</dict>\n";
292   }
293   --indent;
294   Indent(o, indent) << "</array>\n";
295   --indent;
296 
297   // Output any helper text.
298   const auto &s = P.getString();
299   if (!s.empty()) {
300     Indent(o, indent) << "<key>alternate</key>";
301     EmitString(o, s) << '\n';
302   }
303 
304   assert(P.getFixits().size() == 0 &&
305          "Fixits on constrol flow pieces are not implemented yet!");
306 
307   --indent;
308   Indent(o, indent) << "</dict>\n";
309 }
310 
ReportEvent(raw_ostream & o,const PathDiagnosticEventPiece & P,unsigned indent,unsigned depth,bool isKeyEvent)311 void PlistPrinter::ReportEvent(raw_ostream &o, const PathDiagnosticEventPiece& P,
312                                unsigned indent, unsigned depth,
313                                bool isKeyEvent) {
314 
315   const SourceManager &SM = PP.getSourceManager();
316 
317   Indent(o, indent) << "<dict>\n";
318   ++indent;
319 
320   Indent(o, indent) << "<key>kind</key><string>event</string>\n";
321 
322   if (isKeyEvent) {
323     Indent(o, indent) << "<key>key_event</key><true/>\n";
324   }
325 
326   // Output the location.
327   FullSourceLoc L = P.getLocation().asLocation();
328 
329   Indent(o, indent) << "<key>location</key>\n";
330   EmitLocation(o, SM, L, FM, indent);
331 
332   // Output the ranges (if any).
333   ArrayRef<SourceRange> Ranges = P.getRanges();
334   EmitRanges(o, Ranges, indent);
335 
336   // Output the call depth.
337   Indent(o, indent) << "<key>depth</key>";
338   EmitInteger(o, depth) << '\n';
339 
340   // Output the text.
341   EmitMessage(o, P.getString(), indent);
342 
343   // Output the fixits.
344   EmitFixits(o, P.getFixits(), indent);
345 
346   // Finish up.
347   --indent;
348   Indent(o, indent); o << "</dict>\n";
349 }
350 
ReportCall(raw_ostream & o,const PathDiagnosticCallPiece & P,unsigned indent,unsigned depth)351 void PlistPrinter::ReportCall(raw_ostream &o, const PathDiagnosticCallPiece &P,
352                               unsigned indent,
353                               unsigned depth) {
354 
355   if (auto callEnter = P.getCallEnterEvent())
356     ReportPiece(o, *callEnter, indent, depth, /*includeControlFlow*/ true,
357                 P.isLastInMainSourceFile());
358 
359 
360   ++depth;
361 
362   if (auto callEnterWithinCaller = P.getCallEnterWithinCallerEvent())
363     ReportPiece(o, *callEnterWithinCaller, indent, depth,
364                 /*includeControlFlow*/ true);
365 
366   for (PathPieces::const_iterator I = P.path.begin(), E = P.path.end();I!=E;++I)
367     ReportPiece(o, **I, indent, depth, /*includeControlFlow*/ true);
368 
369   --depth;
370 
371   if (auto callExit = P.getCallExitEvent())
372     ReportPiece(o, *callExit, indent, depth, /*includeControlFlow*/ true);
373 
374   assert(P.getFixits().size() == 0 &&
375          "Fixits on call pieces are not implemented yet!");
376 }
377 
ReportMacroSubPieces(raw_ostream & o,const PathDiagnosticMacroPiece & P,unsigned indent,unsigned depth)378 void PlistPrinter::ReportMacroSubPieces(raw_ostream &o,
379                                         const PathDiagnosticMacroPiece& P,
380                                         unsigned indent, unsigned depth) {
381   MacroPieces.push_back(&P);
382 
383   for (PathPieces::const_iterator I = P.subPieces.begin(),
384                                   E = P.subPieces.end();
385        I != E; ++I) {
386     ReportPiece(o, **I, indent, depth, /*includeControlFlow*/ false);
387   }
388 
389   assert(P.getFixits().size() == 0 &&
390          "Fixits on constrol flow pieces are not implemented yet!");
391 }
392 
ReportMacroExpansions(raw_ostream & o,unsigned indent)393 void PlistPrinter::ReportMacroExpansions(raw_ostream &o, unsigned indent) {
394 
395   for (const PathDiagnosticMacroPiece *P : MacroPieces) {
396     const SourceManager &SM = PP.getSourceManager();
397     ExpansionInfo EI = getExpandedMacro(P->getLocation().asLocation(), PP, CTU);
398 
399     Indent(o, indent) << "<dict>\n";
400     ++indent;
401 
402     // Output the location.
403     FullSourceLoc L = P->getLocation().asLocation();
404 
405     Indent(o, indent) << "<key>location</key>\n";
406     EmitLocation(o, SM, L, FM, indent);
407 
408     // Output the ranges (if any).
409     ArrayRef<SourceRange> Ranges = P->getRanges();
410     EmitRanges(o, Ranges, indent);
411 
412     // Output the macro name.
413     Indent(o, indent) << "<key>name</key>";
414     EmitString(o, EI.MacroName) << '\n';
415 
416     // Output what it expands into.
417     Indent(o, indent) << "<key>expansion</key>";
418     EmitString(o, EI.Expansion) << '\n';
419 
420     // Finish up.
421     --indent;
422     Indent(o, indent);
423     o << "</dict>\n";
424   }
425 }
426 
ReportNote(raw_ostream & o,const PathDiagnosticNotePiece & P,unsigned indent)427 void PlistPrinter::ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
428                               unsigned indent) {
429 
430   const SourceManager &SM = PP.getSourceManager();
431 
432   Indent(o, indent) << "<dict>\n";
433   ++indent;
434 
435   // Output the location.
436   FullSourceLoc L = P.getLocation().asLocation();
437 
438   Indent(o, indent) << "<key>location</key>\n";
439   EmitLocation(o, SM, L, FM, indent);
440 
441   // Output the ranges (if any).
442   ArrayRef<SourceRange> Ranges = P.getRanges();
443   EmitRanges(o, Ranges, indent);
444 
445   // Output the text.
446   EmitMessage(o, P.getString(), indent);
447 
448   // Output the fixits.
449   EmitFixits(o, P.getFixits(), indent);
450 
451   // Finish up.
452   --indent;
453   Indent(o, indent); o << "</dict>\n";
454 }
455 
ReportPopUp(raw_ostream & o,const PathDiagnosticPopUpPiece & P,unsigned indent)456 void PlistPrinter::ReportPopUp(raw_ostream &o,
457                                const PathDiagnosticPopUpPiece &P,
458                                unsigned indent) {
459   const SourceManager &SM = PP.getSourceManager();
460 
461   Indent(o, indent) << "<dict>\n";
462   ++indent;
463 
464   Indent(o, indent) << "<key>kind</key><string>pop-up</string>\n";
465 
466   // Output the location.
467   FullSourceLoc L = P.getLocation().asLocation();
468 
469   Indent(o, indent) << "<key>location</key>\n";
470   EmitLocation(o, SM, L, FM, indent);
471 
472   // Output the ranges (if any).
473   ArrayRef<SourceRange> Ranges = P.getRanges();
474   EmitRanges(o, Ranges, indent);
475 
476   // Output the text.
477   EmitMessage(o, P.getString(), indent);
478 
479   assert(P.getFixits().size() == 0 &&
480          "Fixits on pop-up pieces are not implemented yet!");
481 
482   // Finish up.
483   --indent;
484   Indent(o, indent) << "</dict>\n";
485 }
486 
487 //===----------------------------------------------------------------------===//
488 // Static function definitions.
489 //===----------------------------------------------------------------------===//
490 
491 /// Print coverage information to output stream {@code o}.
492 /// May modify the used list of files {@code Fids} by inserting new ones.
printCoverage(const PathDiagnostic * D,unsigned InputIndentLevel,SmallVectorImpl<FileID> & Fids,FIDMap & FM,llvm::raw_fd_ostream & o)493 static void printCoverage(const PathDiagnostic *D,
494                           unsigned InputIndentLevel,
495                           SmallVectorImpl<FileID> &Fids,
496                           FIDMap &FM,
497                           llvm::raw_fd_ostream &o) {
498   unsigned IndentLevel = InputIndentLevel;
499 
500   Indent(o, IndentLevel) << "<key>ExecutedLines</key>\n";
501   Indent(o, IndentLevel) << "<dict>\n";
502   IndentLevel++;
503 
504   // Mapping from file IDs to executed lines.
505   const FilesToLineNumsMap &ExecutedLines = D->getExecutedLines();
506   for (auto I = ExecutedLines.begin(), E = ExecutedLines.end(); I != E; ++I) {
507     unsigned FileKey = AddFID(FM, Fids, I->first);
508     Indent(o, IndentLevel) << "<key>" << FileKey << "</key>\n";
509     Indent(o, IndentLevel) << "<array>\n";
510     IndentLevel++;
511     for (unsigned LineNo : I->second) {
512       Indent(o, IndentLevel);
513       EmitInteger(o, LineNo) << "\n";
514     }
515     IndentLevel--;
516     Indent(o, IndentLevel) << "</array>\n";
517   }
518   IndentLevel--;
519   Indent(o, IndentLevel) << "</dict>\n";
520 
521   assert(IndentLevel == InputIndentLevel);
522 }
523 
printBugPath(llvm::raw_ostream & o,const FIDMap & FM,AnalyzerOptions & AnOpts,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU,const PathPieces & Path)524 static void printBugPath(llvm::raw_ostream &o, const FIDMap& FM,
525                          AnalyzerOptions &AnOpts, const Preprocessor &PP,
526                          const cross_tu::CrossTranslationUnitContext &CTU,
527                          const PathPieces &Path) {
528   PlistPrinter Printer(FM, AnOpts, PP, CTU);
529   assert(std::is_partitioned(Path.begin(), Path.end(),
530                              [](const PathDiagnosticPieceRef &E) {
531                                return E->getKind() == PathDiagnosticPiece::Note;
532                              }) &&
533          "PathDiagnostic is not partitioned so that notes precede the rest");
534 
535   PathPieces::const_iterator FirstNonNote = std::partition_point(
536       Path.begin(), Path.end(), [](const PathDiagnosticPieceRef &E) {
537         return E->getKind() == PathDiagnosticPiece::Note;
538       });
539 
540   PathPieces::const_iterator I = Path.begin();
541 
542   if (FirstNonNote != Path.begin()) {
543     o << "   <key>notes</key>\n"
544          "   <array>\n";
545 
546     for (; I != FirstNonNote; ++I)
547       Printer.ReportDiag(o, **I);
548 
549     o << "   </array>\n";
550   }
551 
552   o << "   <key>path</key>\n";
553 
554   o << "   <array>\n";
555 
556   for (PathPieces::const_iterator E = Path.end(); I != E; ++I)
557     Printer.ReportDiag(o, **I);
558 
559   o << "   </array>\n";
560 
561   if (!AnOpts.ShouldDisplayMacroExpansions)
562     return;
563 
564   o << "   <key>macro_expansions</key>\n"
565        "   <array>\n";
566   Printer.ReportMacroExpansions(o, /* indent */ 4);
567   o << "   </array>\n";
568 }
569 
570 //===----------------------------------------------------------------------===//
571 // Methods of PlistDiagnostics.
572 //===----------------------------------------------------------------------===//
573 
PlistDiagnostics(AnalyzerOptions & AnalyzerOpts,const std::string & output,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU,bool supportsMultipleFiles)574 PlistDiagnostics::PlistDiagnostics(
575     AnalyzerOptions &AnalyzerOpts, const std::string &output,
576     const Preprocessor &PP, const cross_tu::CrossTranslationUnitContext &CTU,
577     bool supportsMultipleFiles)
578     : OutputFile(output), PP(PP), CTU(CTU), AnOpts(AnalyzerOpts),
579       SupportsCrossFileDiagnostics(supportsMultipleFiles) {
580   // FIXME: Will be used by a later planned change.
581   (void)this->CTU;
582 }
583 
createPlistDiagnosticConsumer(AnalyzerOptions & AnalyzerOpts,PathDiagnosticConsumers & C,const std::string & OutputFile,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU)584 void ento::createPlistDiagnosticConsumer(
585     AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
586     const std::string &OutputFile, const Preprocessor &PP,
587     const cross_tu::CrossTranslationUnitContext &CTU) {
588 
589   // TODO: Emit an error here.
590   if (OutputFile.empty())
591     return;
592 
593   C.push_back(new PlistDiagnostics(AnalyzerOpts, OutputFile, PP, CTU,
594                                    /*supportsMultipleFiles*/ false));
595   createTextMinimalPathDiagnosticConsumer(AnalyzerOpts, C, OutputFile, PP, CTU);
596 }
597 
createPlistMultiFileDiagnosticConsumer(AnalyzerOptions & AnalyzerOpts,PathDiagnosticConsumers & C,const std::string & OutputFile,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU)598 void ento::createPlistMultiFileDiagnosticConsumer(
599     AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
600     const std::string &OutputFile, const Preprocessor &PP,
601     const cross_tu::CrossTranslationUnitContext &CTU) {
602 
603   // TODO: Emit an error here.
604   if (OutputFile.empty())
605     return;
606 
607   C.push_back(new PlistDiagnostics(AnalyzerOpts, OutputFile, PP, CTU,
608                                    /*supportsMultipleFiles*/ true));
609   createTextMinimalPathDiagnosticConsumer(AnalyzerOpts, C, OutputFile, PP, CTU);
610 }
611 
FlushDiagnosticsImpl(std::vector<const PathDiagnostic * > & Diags,FilesMade * filesMade)612 void PlistDiagnostics::FlushDiagnosticsImpl(
613                                     std::vector<const PathDiagnostic *> &Diags,
614                                     FilesMade *filesMade) {
615   // Build up a set of FIDs that we use by scanning the locations and
616   // ranges of the diagnostics.
617   FIDMap FM;
618   SmallVector<FileID, 10> Fids;
619   const SourceManager& SM = PP.getSourceManager();
620   const LangOptions &LangOpts = PP.getLangOpts();
621 
622   auto AddPieceFID = [&FM, &Fids, &SM](const PathDiagnosticPiece &Piece) {
623     AddFID(FM, Fids, SM, Piece.getLocation().asLocation());
624     ArrayRef<SourceRange> Ranges = Piece.getRanges();
625     for (const SourceRange &Range : Ranges) {
626       AddFID(FM, Fids, SM, Range.getBegin());
627       AddFID(FM, Fids, SM, Range.getEnd());
628     }
629   };
630 
631   for (const PathDiagnostic *D : Diags) {
632 
633     SmallVector<const PathPieces *, 5> WorkList;
634     WorkList.push_back(&D->path);
635 
636     while (!WorkList.empty()) {
637       const PathPieces &Path = *WorkList.pop_back_val();
638 
639       for (const auto &Iter : Path) {
640         const PathDiagnosticPiece &Piece = *Iter;
641         AddPieceFID(Piece);
642 
643         if (const PathDiagnosticCallPiece *Call =
644                 dyn_cast<PathDiagnosticCallPiece>(&Piece)) {
645           if (auto CallEnterWithin = Call->getCallEnterWithinCallerEvent())
646             AddPieceFID(*CallEnterWithin);
647 
648           if (auto CallEnterEvent = Call->getCallEnterEvent())
649             AddPieceFID(*CallEnterEvent);
650 
651           WorkList.push_back(&Call->path);
652         } else if (const PathDiagnosticMacroPiece *Macro =
653                        dyn_cast<PathDiagnosticMacroPiece>(&Piece)) {
654           WorkList.push_back(&Macro->subPieces);
655         }
656       }
657     }
658   }
659 
660   // Open the file.
661   std::error_code EC;
662   llvm::raw_fd_ostream o(OutputFile, EC, llvm::sys::fs::OF_Text);
663   if (EC) {
664     llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
665     return;
666   }
667 
668   EmitPlistHeader(o);
669 
670   // Write the root object: a <dict> containing...
671   //  - "clang_version", the string representation of clang version
672   //  - "files", an <array> mapping from FIDs to file names
673   //  - "diagnostics", an <array> containing the path diagnostics
674   o << "<dict>\n" <<
675        " <key>clang_version</key>\n";
676   EmitString(o, getClangFullVersion()) << '\n';
677   o << " <key>diagnostics</key>\n"
678        " <array>\n";
679 
680   for (std::vector<const PathDiagnostic*>::iterator DI=Diags.begin(),
681        DE = Diags.end(); DI!=DE; ++DI) {
682 
683     o << "  <dict>\n";
684 
685     const PathDiagnostic *D = *DI;
686     printBugPath(o, FM, AnOpts, PP, CTU, D->path);
687 
688     // Output the bug type and bug category.
689     o << "   <key>description</key>";
690     EmitString(o, D->getShortDescription()) << '\n';
691     o << "   <key>category</key>";
692     EmitString(o, D->getCategory()) << '\n';
693     o << "   <key>type</key>";
694     EmitString(o, D->getBugType()) << '\n';
695     o << "   <key>check_name</key>";
696     EmitString(o, D->getCheckerName()) << '\n';
697 
698     o << "   <!-- This hash is experimental and going to change! -->\n";
699     o << "   <key>issue_hash_content_of_line_in_context</key>";
700     PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
701     FullSourceLoc L(SM.getExpansionLoc(UPDLoc.isValid()
702                                             ? UPDLoc.asLocation()
703                                             : D->getLocation().asLocation()),
704                     SM);
705     const Decl *DeclWithIssue = D->getDeclWithIssue();
706     EmitString(o, GetIssueHash(SM, L, D->getCheckerName(), D->getBugType(),
707                                DeclWithIssue, LangOpts))
708         << '\n';
709 
710     // Output information about the semantic context where
711     // the issue occurred.
712     if (const Decl *DeclWithIssue = D->getDeclWithIssue()) {
713       // FIXME: handle blocks, which have no name.
714       if (const NamedDecl *ND = dyn_cast<NamedDecl>(DeclWithIssue)) {
715         StringRef declKind;
716         switch (ND->getKind()) {
717           case Decl::CXXRecord:
718             declKind = "C++ class";
719             break;
720           case Decl::CXXMethod:
721             declKind = "C++ method";
722             break;
723           case Decl::ObjCMethod:
724             declKind = "Objective-C method";
725             break;
726           case Decl::Function:
727             declKind = "function";
728             break;
729           default:
730             break;
731         }
732         if (!declKind.empty()) {
733           const std::string &declName = ND->getDeclName().getAsString();
734           o << "  <key>issue_context_kind</key>";
735           EmitString(o, declKind) << '\n';
736           o << "  <key>issue_context</key>";
737           EmitString(o, declName) << '\n';
738         }
739 
740         // Output the bug hash for issue unique-ing. Currently, it's just an
741         // offset from the beginning of the function.
742         if (const Stmt *Body = DeclWithIssue->getBody()) {
743 
744           // If the bug uniqueing location exists, use it for the hash.
745           // For example, this ensures that two leaks reported on the same line
746           // will have different issue_hashes and that the hash will identify
747           // the leak location even after code is added between the allocation
748           // site and the end of scope (leak report location).
749           if (UPDLoc.isValid()) {
750             FullSourceLoc UFunL(
751                 SM.getExpansionLoc(
752                     D->getUniqueingDecl()->getBody()->getBeginLoc()),
753                 SM);
754             o << "  <key>issue_hash_function_offset</key><string>"
755               << L.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
756               << "</string>\n";
757 
758           // Otherwise, use the location on which the bug is reported.
759           } else {
760             FullSourceLoc FunL(SM.getExpansionLoc(Body->getBeginLoc()), SM);
761             o << "  <key>issue_hash_function_offset</key><string>"
762               << L.getExpansionLineNumber() - FunL.getExpansionLineNumber()
763               << "</string>\n";
764           }
765 
766         }
767       }
768     }
769 
770     // Output the location of the bug.
771     o << "  <key>location</key>\n";
772     EmitLocation(o, SM, D->getLocation().asLocation(), FM, 2);
773 
774     // Output the diagnostic to the sub-diagnostic client, if any.
775     if (!filesMade->empty()) {
776       StringRef lastName;
777       PDFileEntry::ConsumerFiles *files = filesMade->getFiles(*D);
778       if (files) {
779         for (PDFileEntry::ConsumerFiles::const_iterator CI = files->begin(),
780                 CE = files->end(); CI != CE; ++CI) {
781           StringRef newName = CI->first;
782           if (newName != lastName) {
783             if (!lastName.empty()) {
784               o << "  </array>\n";
785             }
786             lastName = newName;
787             o <<  "  <key>" << lastName << "_files</key>\n";
788             o << "  <array>\n";
789           }
790           o << "   <string>" << CI->second << "</string>\n";
791         }
792         o << "  </array>\n";
793       }
794     }
795 
796     printCoverage(D, /*IndentLevel=*/2, Fids, FM, o);
797 
798     // Close up the entry.
799     o << "  </dict>\n";
800   }
801 
802   o << " </array>\n";
803 
804   o << " <key>files</key>\n"
805        " <array>\n";
806   for (FileID FID : Fids)
807     EmitString(o << "  ", SM.getFileEntryForID(FID)->getName()) << '\n';
808   o << " </array>\n";
809 
810   if (llvm::AreStatisticsEnabled() && AnOpts.ShouldSerializeStats) {
811     o << " <key>statistics</key>\n";
812     std::string stats;
813     llvm::raw_string_ostream os(stats);
814     llvm::PrintStatisticsJSON(os);
815     os.flush();
816     EmitString(o, html::EscapeText(stats)) << '\n';
817   }
818 
819   // Finish.
820   o << "</dict>\n</plist>\n";
821 }
822 
823 //===----------------------------------------------------------------------===//
824 // Declarations of helper functions and data structures for expanding macros.
825 //===----------------------------------------------------------------------===//
826 
827 namespace {
828 
829 using ArgTokensTy = llvm::SmallVector<Token, 2>;
830 
831 } // end of anonymous namespace
832 
833 LLVM_DUMP_METHOD static void dumpArgTokensToStream(llvm::raw_ostream &Out,
834                                                    const Preprocessor &PP,
835                                                    const ArgTokensTy &Toks);
836 
837 namespace {
838 /// Maps unexpanded macro parameters to expanded arguments. A macro argument may
839 /// need to expanded further when it is nested inside another macro.
840 class MacroParamMap : public std::map<const IdentifierInfo *, ArgTokensTy> {
841 public:
842   void expandFromPrevMacro(const MacroParamMap &Super);
843 
dump(const Preprocessor & PP) const844   LLVM_DUMP_METHOD void dump(const Preprocessor &PP) const {
845     dumpToStream(llvm::errs(), PP);
846   }
847 
848   LLVM_DUMP_METHOD void dumpToStream(llvm::raw_ostream &Out,
849                                      const Preprocessor &PP) const;
850 };
851 
852 struct MacroExpansionInfo {
853   std::string Name;
854   const MacroInfo *MI = nullptr;
855   MacroParamMap ParamMap;
856 
MacroExpansionInfo__anon9a8b21cc0811::MacroExpansionInfo857   MacroExpansionInfo(std::string N, const MacroInfo *MI, MacroParamMap M)
858       : Name(std::move(N)), MI(MI), ParamMap(std::move(M)) {}
859 };
860 
861 class TokenPrinter {
862   llvm::raw_ostream &OS;
863   const Preprocessor &PP;
864 
865   Token PrevTok, PrevPrevTok;
866   TokenConcatenation ConcatInfo;
867 
868 public:
TokenPrinter(llvm::raw_ostream & OS,const Preprocessor & PP)869   TokenPrinter(llvm::raw_ostream &OS, const Preprocessor &PP)
870     : OS(OS), PP(PP), ConcatInfo(PP) {
871     PrevTok.setKind(tok::unknown);
872     PrevPrevTok.setKind(tok::unknown);
873   }
874 
875   void printToken(const Token &Tok);
876 };
877 
878 /// Wrapper around a Lexer object that can lex tokens one-by-one. Its possible
879 /// to "inject" a range of tokens into the stream, in which case the next token
880 /// is retrieved from the next element of the range, until the end of the range
881 /// is reached.
882 class TokenStream {
883 public:
TokenStream(SourceLocation ExpanLoc,const SourceManager & SM,const LangOptions & LangOpts)884   TokenStream(SourceLocation ExpanLoc, const SourceManager &SM,
885               const LangOptions &LangOpts)
886       : ExpanLoc(ExpanLoc) {
887     FileID File;
888     unsigned Offset;
889     std::tie(File, Offset) = SM.getDecomposedLoc(ExpanLoc);
890     llvm::MemoryBufferRef MB = SM.getBufferOrFake(File);
891     const char *MacroNameTokenPos = MB.getBufferStart() + Offset;
892 
893     RawLexer = std::make_unique<Lexer>(SM.getLocForStartOfFile(File), LangOpts,
894                                        MB.getBufferStart(), MacroNameTokenPos,
895                                        MB.getBufferEnd());
896   }
897 
next(Token & Result)898   void next(Token &Result) {
899     if (CurrTokenIt == TokenRange.end()) {
900       RawLexer->LexFromRawLexer(Result);
901       return;
902     }
903     Result = *CurrTokenIt;
904     CurrTokenIt++;
905   }
906 
injectRange(const ArgTokensTy & Range)907   void injectRange(const ArgTokensTy &Range) {
908     TokenRange = Range;
909     CurrTokenIt = TokenRange.begin();
910   }
911 
912   std::unique_ptr<Lexer> RawLexer;
913   ArgTokensTy TokenRange;
914   ArgTokensTy::iterator CurrTokenIt = TokenRange.begin();
915   SourceLocation ExpanLoc;
916 };
917 
918 } // end of anonymous namespace
919 
920 /// The implementation method of getMacroExpansion: It prints the expansion of
921 /// a macro to \p Printer, and returns with the name of the macro.
922 ///
923 /// Since macros can be nested in one another, this function may call itself
924 /// recursively.
925 ///
926 /// Unfortunately, macro arguments have to expanded manually. To understand why,
927 /// observe the following example:
928 ///
929 ///   #define PRINT(x) print(x)
930 ///   #define DO_SOMETHING(str) PRINT(str)
931 ///
932 ///   DO_SOMETHING("Cute panda cubs.");
933 ///
934 /// As we expand the last line, we'll immediately replace PRINT(str) with
935 /// print(x). The information that both 'str' and 'x' refers to the same string
936 /// is an information we have to forward, hence the argument \p PrevParamMap.
937 ///
938 /// To avoid infinite recursion we maintain the already processed tokens in
939 /// a set. This is carried as a parameter through the recursive calls. The set
940 /// is extended with the currently processed token and after processing it, the
941 /// token is removed. If the token is already in the set, then recursion stops:
942 ///
943 /// #define f(y) x
944 /// #define x f(x)
945 static std::string getMacroNameAndPrintExpansion(
946     TokenPrinter &Printer, SourceLocation MacroLoc, const Preprocessor &PP,
947     const MacroParamMap &PrevParamMap,
948     llvm::SmallPtrSet<IdentifierInfo *, 8> &AlreadyProcessedTokens);
949 
950 /// Retrieves the name of the macro and what it's parameters expand into
951 /// at \p ExpanLoc.
952 ///
953 /// For example, for the following macro expansion:
954 ///
955 ///   #define SET_TO_NULL(x) x = 0
956 ///   #define NOT_SUSPICIOUS(a) \
957 ///     {                       \
958 ///       int b = 0;            \
959 ///     }                       \
960 ///     SET_TO_NULL(a)
961 ///
962 ///   int *ptr = new int(4);
963 ///   NOT_SUSPICIOUS(&ptr);
964 ///   *ptr = 5;
965 ///
966 /// When \p ExpanLoc references the last line, the macro name "NOT_SUSPICIOUS"
967 /// and the MacroArgMap map { (a, &ptr) } will be returned.
968 ///
969 /// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of
970 /// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgMap map
971 /// { (x, a) } will be returned.
972 static MacroExpansionInfo
973 getMacroExpansionInfo(const MacroParamMap &PrevParamMap,
974                       SourceLocation ExpanLoc, const Preprocessor &PP);
975 
976 /// Retrieves the ')' token that matches '(' \p It points to.
977 static MacroInfo::tokens_iterator getMatchingRParen(
978     MacroInfo::tokens_iterator It,
979     MacroInfo::tokens_iterator End);
980 
981 /// Retrieves the macro info for \p II refers to at \p Loc. This is important
982 /// because macros can be redefined or undefined.
983 static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP,
984                                                 const SourceManager &SM,
985                                                 const IdentifierInfo *II,
986                                                 SourceLocation Loc);
987 
988 //===----------------------------------------------------------------------===//
989 // Definitions of helper functions and methods for expanding macros.
990 //===----------------------------------------------------------------------===//
991 
992 static ExpansionInfo
getExpandedMacro(SourceLocation MacroLoc,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU)993 getExpandedMacro(SourceLocation MacroLoc, const Preprocessor &PP,
994                  const cross_tu::CrossTranslationUnitContext &CTU) {
995 
996   const Preprocessor *PPToUse = &PP;
997   if (auto LocAndUnit = CTU.getImportedFromSourceLocation(MacroLoc)) {
998     MacroLoc = LocAndUnit->first;
999     PPToUse = &LocAndUnit->second->getPreprocessor();
1000   }
1001 
1002   llvm::SmallString<200> ExpansionBuf;
1003   llvm::raw_svector_ostream OS(ExpansionBuf);
1004   TokenPrinter Printer(OS, *PPToUse);
1005   llvm::SmallPtrSet<IdentifierInfo*, 8> AlreadyProcessedTokens;
1006 
1007   std::string MacroName = getMacroNameAndPrintExpansion(
1008       Printer, MacroLoc, *PPToUse, MacroParamMap{}, AlreadyProcessedTokens);
1009   return {MacroName, std::string(OS.str())};
1010 }
1011 
getMacroNameAndPrintExpansion(TokenPrinter & Printer,SourceLocation MacroLoc,const Preprocessor & PP,const MacroParamMap & PrevParamMap,llvm::SmallPtrSet<IdentifierInfo *,8> & AlreadyProcessedTokens)1012 static std::string getMacroNameAndPrintExpansion(
1013     TokenPrinter &Printer, SourceLocation MacroLoc, const Preprocessor &PP,
1014     const MacroParamMap &PrevParamMap,
1015     llvm::SmallPtrSet<IdentifierInfo *, 8> &AlreadyProcessedTokens) {
1016 
1017   const SourceManager &SM = PP.getSourceManager();
1018 
1019   MacroExpansionInfo MExpInfo =
1020       getMacroExpansionInfo(PrevParamMap, SM.getExpansionLoc(MacroLoc), PP);
1021   IdentifierInfo *MacroNameII = PP.getIdentifierInfo(MExpInfo.Name);
1022 
1023   // TODO: If the macro definition contains another symbol then this function is
1024   // called recursively. In case this symbol is the one being defined, it will
1025   // be an infinite recursion which is stopped by this "if" statement. However,
1026   // in this case we don't get the full expansion text in the Plist file. See
1027   // the test file where "value" is expanded to "garbage_" instead of
1028   // "garbage_value".
1029   if (!AlreadyProcessedTokens.insert(MacroNameII).second)
1030     return MExpInfo.Name;
1031 
1032   if (!MExpInfo.MI)
1033     return MExpInfo.Name;
1034 
1035   // Manually expand its arguments from the previous macro.
1036   MExpInfo.ParamMap.expandFromPrevMacro(PrevParamMap);
1037 
1038   // Iterate over the macro's tokens and stringify them.
1039   for (auto It = MExpInfo.MI->tokens_begin(), E = MExpInfo.MI->tokens_end();
1040        It != E; ++It) {
1041     Token T = *It;
1042 
1043     // If this token is not an identifier, we only need to print it.
1044     if (T.isNot(tok::identifier)) {
1045       Printer.printToken(T);
1046       continue;
1047     }
1048 
1049     const auto *II = T.getIdentifierInfo();
1050     assert(II &&
1051           "This token is an identifier but has no IdentifierInfo!");
1052 
1053     // If this token is a macro that should be expanded inside the current
1054     // macro.
1055     if (getMacroInfoForLocation(PP, SM, II, T.getLocation())) {
1056       getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP,
1057                                     MExpInfo.ParamMap, AlreadyProcessedTokens);
1058 
1059       // If this is a function-like macro, skip its arguments, as
1060       // getExpandedMacro() already printed them. If this is the case, let's
1061       // first jump to the '(' token.
1062       auto N = std::next(It);
1063       if (N != E && N->is(tok::l_paren))
1064         It = getMatchingRParen(++It, E);
1065       continue;
1066     }
1067 
1068     // If this token is the current macro's argument, we should expand it.
1069     auto ParamToArgIt = MExpInfo.ParamMap.find(II);
1070     if (ParamToArgIt != MExpInfo.ParamMap.end()) {
1071       for (MacroInfo::tokens_iterator ArgIt = ParamToArgIt->second.begin(),
1072                                       ArgEnd = ParamToArgIt->second.end();
1073            ArgIt != ArgEnd; ++ArgIt) {
1074 
1075         // These tokens may still be macros, if that is the case, handle it the
1076         // same way we did above.
1077         const auto *ArgII = ArgIt->getIdentifierInfo();
1078         if (!ArgII) {
1079           Printer.printToken(*ArgIt);
1080           continue;
1081         }
1082 
1083         const auto *MI = PP.getMacroInfo(ArgII);
1084         if (!MI) {
1085           Printer.printToken(*ArgIt);
1086           continue;
1087         }
1088 
1089         getMacroNameAndPrintExpansion(Printer, ArgIt->getLocation(), PP,
1090                                       MExpInfo.ParamMap,
1091                                       AlreadyProcessedTokens);
1092         // Peek the next token if it is a tok::l_paren. This way we can decide
1093         // if this is the application or just a reference to a function maxro
1094         // symbol:
1095         //
1096         // #define apply(f) ...
1097         // #define func(x) ...
1098         // apply(func)
1099         // apply(func(42))
1100         auto N = std::next(ArgIt);
1101         if (N != ArgEnd && N->is(tok::l_paren))
1102           ArgIt = getMatchingRParen(++ArgIt, ArgEnd);
1103       }
1104       continue;
1105     }
1106 
1107     // If control reached here, then this token isn't a macro identifier, nor an
1108     // unexpanded macro argument that we need to handle, print it.
1109     Printer.printToken(T);
1110   }
1111 
1112   AlreadyProcessedTokens.erase(MacroNameII);
1113 
1114   return MExpInfo.Name;
1115 }
1116 
1117 static MacroExpansionInfo
getMacroExpansionInfo(const MacroParamMap & PrevParamMap,SourceLocation ExpanLoc,const Preprocessor & PP)1118 getMacroExpansionInfo(const MacroParamMap &PrevParamMap,
1119                       SourceLocation ExpanLoc, const Preprocessor &PP) {
1120 
1121   const SourceManager &SM = PP.getSourceManager();
1122   const LangOptions &LangOpts = PP.getLangOpts();
1123 
1124   // First, we create a Lexer to lex *at the expansion location* the tokens
1125   // referring to the macro's name and its arguments.
1126   TokenStream TStream(ExpanLoc, SM, LangOpts);
1127 
1128   // Acquire the macro's name.
1129   Token TheTok;
1130   TStream.next(TheTok);
1131 
1132   std::string MacroName = PP.getSpelling(TheTok);
1133 
1134   const auto *II = PP.getIdentifierInfo(MacroName);
1135   assert(II && "Failed to acquire the IdentifierInfo for the macro!");
1136 
1137   const MacroInfo *MI = getMacroInfoForLocation(PP, SM, II, ExpanLoc);
1138   // assert(MI && "The macro must've been defined at it's expansion location!");
1139   //
1140   // We should always be able to obtain the MacroInfo in a given TU, but if
1141   // we're running the analyzer with CTU, the Preprocessor won't contain the
1142   // directive history (or anything for that matter) from another TU.
1143   // TODO: assert when we're not running with CTU.
1144   if (!MI)
1145     return { MacroName, MI, {} };
1146 
1147   // Acquire the macro's arguments at the expansion point.
1148   //
1149   // The rough idea here is to lex from the first left parentheses to the last
1150   // right parentheses, and map the macro's parameter to what they will be
1151   // expanded to. A macro argument may contain several token (like '3 + 4'), so
1152   // we'll lex until we find a tok::comma or tok::r_paren, at which point we
1153   // start lexing the next argument or finish.
1154   ArrayRef<const IdentifierInfo *> MacroParams = MI->params();
1155   if (MacroParams.empty())
1156     return { MacroName, MI, {} };
1157 
1158   TStream.next(TheTok);
1159   // When this is a token which expands to another macro function then its
1160   // parentheses are not at its expansion locaiton. For example:
1161   //
1162   // #define foo(x) int bar() { return x; }
1163   // #define apply_zero(f) f(0)
1164   // apply_zero(foo)
1165   //               ^
1166   //               This is not a tok::l_paren, but foo is a function.
1167   if (TheTok.isNot(tok::l_paren))
1168     return { MacroName, MI, {} };
1169 
1170   MacroParamMap ParamMap;
1171 
1172   // When the argument is a function call, like
1173   //   CALL_FN(someFunctionName(param1, param2))
1174   // we will find tok::l_paren, tok::r_paren, and tok::comma that do not divide
1175   // actual macro arguments, or do not represent the macro argument's closing
1176   // parentheses, so we'll count how many parentheses aren't closed yet.
1177   // If ParanthesesDepth
1178   //   * = 0, then there are no more arguments to lex.
1179   //   * = 1, then if we find a tok::comma, we can start lexing the next arg.
1180   //   * > 1, then tok::comma is a part of the current arg.
1181   int ParenthesesDepth = 1;
1182 
1183   // If we encounter the variadic arg, we will lex until the closing
1184   // tok::r_paren, even if we lex a tok::comma and ParanthesesDepth == 1.
1185   const IdentifierInfo *VariadicParamII = PP.getIdentifierInfo("__VA_ARGS__");
1186   if (MI->isGNUVarargs()) {
1187     // If macro uses GNU-style variadic args, the param name is user-supplied,
1188     // an not "__VA_ARGS__".  E.g.:
1189     //   #define FOO(a, b, myvargs...)
1190     // In this case, just use the last parameter:
1191     VariadicParamII = *(MacroParams.rbegin());
1192   }
1193 
1194   for (const IdentifierInfo *CurrParamII : MacroParams) {
1195     MacroParamMap::mapped_type ArgTokens;
1196 
1197     // One could also simply not supply a single argument to __VA_ARGS__ -- this
1198     // results in a preprocessor warning, but is not an error:
1199     //   #define VARIADIC(ptr, ...) \
1200     //     someVariadicTemplateFunction(__VA_ARGS__)
1201     //
1202     //   int *ptr;
1203     //   VARIADIC(ptr); // Note that there are no commas, this isn't just an
1204     //                  // empty parameter -- there are no parameters for '...'.
1205     // In any other case, ParenthesesDepth mustn't be 0 here.
1206     if (ParenthesesDepth != 0) {
1207 
1208       // Lex the first token of the next macro parameter.
1209       TStream.next(TheTok);
1210 
1211       while (CurrParamII == VariadicParamII || ParenthesesDepth != 1 ||
1212              !TheTok.is(tok::comma)) {
1213         assert(TheTok.isNot(tok::eof) &&
1214                "EOF encountered while looking for expanded macro args!");
1215 
1216         if (TheTok.is(tok::l_paren))
1217           ++ParenthesesDepth;
1218 
1219         if (TheTok.is(tok::r_paren))
1220           --ParenthesesDepth;
1221 
1222         if (ParenthesesDepth == 0)
1223           break;
1224 
1225         if (TheTok.is(tok::raw_identifier)) {
1226           PP.LookUpIdentifierInfo(TheTok);
1227           // This token is a variadic parameter:
1228           //
1229           //   #define PARAMS_RESOLVE_TO_VA_ARGS(i, fmt) foo(i, fmt); \
1230           //     i = 0;
1231           //   #define DISPATCH(...) \
1232           //     PARAMS_RESOLVE_TO_VA_ARGS(__VA_ARGS__);
1233           //                            // ^~~~~~~~~~~ Variadic parameter here
1234           //
1235           //   void multipleParamsResolveToVA_ARGS(void) {
1236           //     int x = 1;
1237           //     DISPATCH(x, "LF1M healer"); // Multiple arguments are mapped to
1238           //                                 // a single __VA_ARGS__ parameter.
1239           //     (void)(10 / x);
1240           //   }
1241           //
1242           // We will stumble across this while trying to expand
1243           // PARAMS_RESOLVE_TO_VA_ARGS. By this point, we already noted during
1244           // the processing of DISPATCH what __VA_ARGS__ maps to, so we'll
1245           // retrieve the next series of tokens from that.
1246           if (TheTok.getIdentifierInfo() == VariadicParamII) {
1247             TStream.injectRange(PrevParamMap.at(VariadicParamII));
1248             TStream.next(TheTok);
1249             continue;
1250           }
1251         }
1252 
1253         ArgTokens.push_back(TheTok);
1254         TStream.next(TheTok);
1255       }
1256     } else {
1257       assert(CurrParamII == VariadicParamII &&
1258              "No more macro arguments are found, but the current parameter "
1259              "isn't the variadic arg!");
1260     }
1261 
1262     ParamMap.emplace(CurrParamII, std::move(ArgTokens));
1263   }
1264 
1265   assert(TheTok.is(tok::r_paren) &&
1266          "Expanded macro argument acquisition failed! After the end of the loop"
1267          " this token should be ')'!");
1268 
1269   return {MacroName, MI, ParamMap};
1270 }
1271 
getMatchingRParen(MacroInfo::tokens_iterator It,MacroInfo::tokens_iterator End)1272 static MacroInfo::tokens_iterator getMatchingRParen(
1273     MacroInfo::tokens_iterator It,
1274     MacroInfo::tokens_iterator End) {
1275 
1276   assert(It->is(tok::l_paren) && "This token should be '('!");
1277 
1278   // Skip until we find the closing ')'.
1279   int ParenthesesDepth = 1;
1280   while (ParenthesesDepth != 0) {
1281     ++It;
1282 
1283     assert(It->isNot(tok::eof) &&
1284            "Encountered EOF while attempting to skip macro arguments!");
1285     assert(It != End &&
1286            "End of the macro definition reached before finding ')'!");
1287 
1288     if (It->is(tok::l_paren))
1289       ++ParenthesesDepth;
1290 
1291     if (It->is(tok::r_paren))
1292       --ParenthesesDepth;
1293   }
1294   return It;
1295 }
1296 
getMacroInfoForLocation(const Preprocessor & PP,const SourceManager & SM,const IdentifierInfo * II,SourceLocation Loc)1297 static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP,
1298                                                 const SourceManager &SM,
1299                                                 const IdentifierInfo *II,
1300                                                 SourceLocation Loc) {
1301 
1302   const MacroDirective *MD = PP.getLocalMacroDirectiveHistory(II);
1303   if (!MD)
1304     return nullptr;
1305 
1306   return MD->findDirectiveAtLoc(Loc, SM).getMacroInfo();
1307 }
1308 
expandFromPrevMacro(const MacroParamMap & Super)1309 void MacroParamMap::expandFromPrevMacro(const MacroParamMap &Super) {
1310 
1311   for (value_type &Pair : *this) {
1312     ArgTokensTy &CurrArgTokens = Pair.second;
1313 
1314     // For each token in the expanded macro argument.
1315     auto It = CurrArgTokens.begin();
1316     while (It != CurrArgTokens.end()) {
1317       if (It->isNot(tok::identifier)) {
1318         ++It;
1319         continue;
1320       }
1321 
1322       const auto *II = It->getIdentifierInfo();
1323       assert(II);
1324 
1325       // Is this an argument that "Super" expands further?
1326       if (!Super.count(II)) {
1327         ++It;
1328         continue;
1329       }
1330 
1331       const ArgTokensTy &SuperArgTokens = Super.at(II);
1332 
1333       It = CurrArgTokens.insert(It, SuperArgTokens.begin(),
1334                                 SuperArgTokens.end());
1335       std::advance(It, SuperArgTokens.size());
1336       It = CurrArgTokens.erase(It);
1337     }
1338   }
1339 }
1340 
dumpToStream(llvm::raw_ostream & Out,const Preprocessor & PP) const1341 void MacroParamMap::dumpToStream(llvm::raw_ostream &Out,
1342                                  const Preprocessor &PP) const {
1343   for (const std::pair<const IdentifierInfo *, ArgTokensTy> Pair : *this) {
1344     Out << Pair.first->getName() << " -> ";
1345     dumpArgTokensToStream(Out, PP, Pair.second);
1346     Out << '\n';
1347   }
1348 }
1349 
dumpArgTokensToStream(llvm::raw_ostream & Out,const Preprocessor & PP,const ArgTokensTy & Toks)1350 static void dumpArgTokensToStream(llvm::raw_ostream &Out,
1351                                   const Preprocessor &PP,
1352                                   const ArgTokensTy &Toks) {
1353   TokenPrinter Printer(Out, PP);
1354   for (Token Tok : Toks)
1355     Printer.printToken(Tok);
1356 }
1357 
printToken(const Token & Tok)1358 void TokenPrinter::printToken(const Token &Tok) {
1359   // TODO: Handle GNU extensions where hash and hashhash occurs right before
1360   // __VA_ARGS__.
1361   // cppreference.com: "some compilers offer an extension that allows ## to
1362   // appear after a comma and before __VA_ARGS__, in which case the ## does
1363   // nothing when the variable arguments are present, but removes the comma when
1364   // the variable arguments are not present: this makes it possible to define
1365   // macros such as fprintf (stderr, format, ##__VA_ARGS__)"
1366   // FIXME: Handle named variadic macro parameters (also a GNU extension).
1367 
1368   // If this is the first token to be printed, don't print space.
1369   if (PrevTok.isNot(tok::unknown)) {
1370     // If the tokens were already space separated, or if they must be to avoid
1371     // them being implicitly pasted, add a space between them.
1372     if(Tok.hasLeadingSpace() || ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok,
1373                                                        Tok)) {
1374       // AvoidConcat doesn't check for ##, don't print a space around it.
1375       if (PrevTok.isNot(tok::hashhash) && Tok.isNot(tok::hashhash)) {
1376         OS << ' ';
1377       }
1378     }
1379   }
1380 
1381   if (!Tok.isOneOf(tok::hash, tok::hashhash)) {
1382     if (PrevTok.is(tok::hash))
1383       OS << '\"' << PP.getSpelling(Tok) << '\"';
1384     else
1385       OS << PP.getSpelling(Tok);
1386   }
1387 
1388   PrevPrevTok = PrevTok;
1389   PrevTok = Tok;
1390 }
1391