1 //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the SarifDiagnostics object.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Analysis/MacroExpansionContext.h"
14 #include "clang/Analysis/PathDiagnostic.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/Sarif.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/Version.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/Support/ConvertUTF.h"
24 #include "llvm/Support/JSON.h"
25 #include "llvm/Support/Path.h"
26 
27 using namespace llvm;
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class SarifDiagnostics : public PathDiagnosticConsumer {
33   std::string OutputFile;
34   const LangOptions &LO;
35   SarifDocumentWriter SarifWriter;
36 
37 public:
SarifDiagnostics(const std::string & Output,const LangOptions & LO,const SourceManager & SM)38   SarifDiagnostics(const std::string &Output, const LangOptions &LO,
39                    const SourceManager &SM)
40       : OutputFile(Output), LO(LO), SarifWriter(SM) {}
41   ~SarifDiagnostics() override = default;
42 
43   void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
44                             FilesMade *FM) override;
45 
getName() const46   StringRef getName() const override { return "SarifDiagnostics"; }
getGenerationScheme() const47   PathGenerationScheme getGenerationScheme() const override { return Minimal; }
supportsLogicalOpControlFlow() const48   bool supportsLogicalOpControlFlow() const override { return true; }
supportsCrossFileDiagnostics() const49   bool supportsCrossFileDiagnostics() const override { return true; }
50 };
51 } // end anonymous namespace
52 
createSarifDiagnosticConsumer(PathDiagnosticConsumerOptions DiagOpts,PathDiagnosticConsumers & C,const std::string & Output,const Preprocessor & PP,const cross_tu::CrossTranslationUnitContext & CTU,const MacroExpansionContext & MacroExpansions)53 void ento::createSarifDiagnosticConsumer(
54     PathDiagnosticConsumerOptions DiagOpts, PathDiagnosticConsumers &C,
55     const std::string &Output, const Preprocessor &PP,
56     const cross_tu::CrossTranslationUnitContext &CTU,
57     const MacroExpansionContext &MacroExpansions) {
58 
59   // TODO: Emit an error here.
60   if (Output.empty())
61     return;
62 
63   C.push_back(
64       new SarifDiagnostics(Output, PP.getLangOpts(), PP.getSourceManager()));
65   createTextMinimalPathDiagnosticConsumer(std::move(DiagOpts), C, Output, PP,
66                                           CTU, MacroExpansions);
67 }
68 
getRuleDescription(StringRef CheckName)69 static StringRef getRuleDescription(StringRef CheckName) {
70   return llvm::StringSwitch<StringRef>(CheckName)
71 #define GET_CHECKERS
72 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)                 \
73   .Case(FULLNAME, HELPTEXT)
74 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
75 #undef CHECKER
76 #undef GET_CHECKERS
77       ;
78 }
79 
getRuleHelpURIStr(StringRef CheckName)80 static StringRef getRuleHelpURIStr(StringRef CheckName) {
81   return llvm::StringSwitch<StringRef>(CheckName)
82 #define GET_CHECKERS
83 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)                 \
84   .Case(FULLNAME, DOC_URI)
85 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
86 #undef CHECKER
87 #undef GET_CHECKERS
88       ;
89 }
90 
91 static ThreadFlowImportance
calculateImportance(const PathDiagnosticPiece & Piece)92 calculateImportance(const PathDiagnosticPiece &Piece) {
93   switch (Piece.getKind()) {
94   case PathDiagnosticPiece::Call:
95   case PathDiagnosticPiece::Macro:
96   case PathDiagnosticPiece::Note:
97   case PathDiagnosticPiece::PopUp:
98     // FIXME: What should be reported here?
99     break;
100   case PathDiagnosticPiece::Event:
101     return Piece.getTagStr() == "ConditionBRVisitor"
102                ? ThreadFlowImportance::Important
103                : ThreadFlowImportance::Essential;
104   case PathDiagnosticPiece::ControlFlow:
105     return ThreadFlowImportance::Unimportant;
106   }
107   return ThreadFlowImportance::Unimportant;
108 }
109 
110 /// Accepts a SourceRange corresponding to a pair of the first and last tokens
111 /// and converts to a Character granular CharSourceRange.
convertTokenRangeToCharRange(const SourceRange & R,const SourceManager & SM,const LangOptions & LO)112 static CharSourceRange convertTokenRangeToCharRange(const SourceRange &R,
113                                                     const SourceManager &SM,
114                                                     const LangOptions &LO) {
115   // Caret diagnostics have the first and last locations pointed at the same
116   // location, return these as-is.
117   if (R.getBegin() == R.getEnd())
118     return CharSourceRange::getCharRange(R);
119 
120   SourceLocation BeginCharLoc = R.getBegin();
121   // For token ranges, the raw end SLoc points at the first character of the
122   // last token in the range. This must be moved to one past the end of the
123   // last character using the lexer.
124   SourceLocation EndCharLoc =
125       Lexer::getLocForEndOfToken(R.getEnd(), /* Offset = */ 0, SM, LO);
126   return CharSourceRange::getCharRange(BeginCharLoc, EndCharLoc);
127 }
128 
createThreadFlows(const PathDiagnostic * Diag,const LangOptions & LO)129 static SmallVector<ThreadFlow, 8> createThreadFlows(const PathDiagnostic *Diag,
130                                                     const LangOptions &LO) {
131   SmallVector<ThreadFlow, 8> Flows;
132   const PathPieces &Pieces = Diag->path.flatten(false);
133   for (const auto &Piece : Pieces) {
134     auto Range = convertTokenRangeToCharRange(
135         Piece->getLocation().asRange(), Piece->getLocation().getManager(), LO);
136     auto Flow = ThreadFlow::create()
137                     .setImportance(calculateImportance(*Piece))
138                     .setRange(Range)
139                     .setMessage(Piece->getString());
140     Flows.push_back(Flow);
141   }
142   return Flows;
143 }
144 
145 static StringMap<uint32_t>
createRuleMapping(const std::vector<const PathDiagnostic * > & Diags,SarifDocumentWriter & SarifWriter)146 createRuleMapping(const std::vector<const PathDiagnostic *> &Diags,
147                   SarifDocumentWriter &SarifWriter) {
148   StringMap<uint32_t> RuleMapping;
149   llvm::StringSet<> Seen;
150 
151   for (const PathDiagnostic *D : Diags) {
152     StringRef CheckName = D->getCheckerName();
153     std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(CheckName);
154     if (P.second) {
155       auto Rule = SarifRule::create()
156                       .setName(CheckName)
157                       .setRuleId(CheckName)
158                       .setDescription(getRuleDescription(CheckName))
159                       .setHelpURI(getRuleHelpURIStr(CheckName));
160       size_t RuleIdx = SarifWriter.createRule(Rule);
161       RuleMapping[CheckName] = RuleIdx;
162     }
163   }
164   return RuleMapping;
165 }
166 
createResult(const PathDiagnostic * Diag,const StringMap<uint32_t> & RuleMapping,const LangOptions & LO)167 static SarifResult createResult(const PathDiagnostic *Diag,
168                                 const StringMap<uint32_t> &RuleMapping,
169                                 const LangOptions &LO) {
170 
171   StringRef CheckName = Diag->getCheckerName();
172   uint32_t RuleIdx = RuleMapping.lookup(CheckName);
173   auto Range = convertTokenRangeToCharRange(
174       Diag->getLocation().asRange(), Diag->getLocation().getManager(), LO);
175 
176   SmallVector<ThreadFlow, 8> Flows = createThreadFlows(Diag, LO);
177   auto Result = SarifResult::create(RuleIdx)
178                     .setRuleId(CheckName)
179                     .setDiagnosticMessage(Diag->getVerboseDescription())
180                     .setDiagnosticLevel(SarifResultLevel::Warning)
181                     .setLocations({Range})
182                     .setThreadFlows(Flows);
183   return Result;
184 }
185 
FlushDiagnosticsImpl(std::vector<const PathDiagnostic * > & Diags,FilesMade *)186 void SarifDiagnostics::FlushDiagnosticsImpl(
187     std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
188   // We currently overwrite the file if it already exists. However, it may be
189   // useful to add a feature someday that allows the user to append a run to an
190   // existing SARIF file. One danger from that approach is that the size of the
191   // file can become large very quickly, so decoding into JSON to append a run
192   // may be an expensive operation.
193   std::error_code EC;
194   llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_TextWithCRLF);
195   if (EC) {
196     llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
197     return;
198   }
199 
200   std::string ToolVersion = getClangFullVersion();
201   SarifWriter.createRun("clang", "clang static analyzer", ToolVersion);
202   StringMap<uint32_t> RuleMapping = createRuleMapping(Diags, SarifWriter);
203   for (const PathDiagnostic *D : Diags) {
204     SarifResult Result = createResult(D, RuleMapping, LO);
205     SarifWriter.appendResult(Result);
206   }
207   auto Document = SarifWriter.createDocument();
208   OS << llvm::formatv("{0:2}\n", json::Value(std::move(Document)));
209 }
210