1 //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the SarifDiagnostics object.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Analysis/PathDiagnostic.h"
14 #include "clang/Basic/Version.h"
15 #include "clang/Lex/Preprocessor.h"
16 #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
17 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/Support/ConvertUTF.h"
21 #include "llvm/Support/JSON.h"
22 #include "llvm/Support/Path.h"
23 
24 using namespace llvm;
25 using namespace clang;
26 using namespace ento;
27 
28 namespace {
29 class SarifDiagnostics : public PathDiagnosticConsumer {
30   std::string OutputFile;
31   const LangOptions &LO;
32 
33 public:
34   SarifDiagnostics(AnalyzerOptions &, const std::string &Output,
35                    const LangOptions &LO)
36       : OutputFile(Output), LO(LO) {}
37   ~SarifDiagnostics() override = default;
38 
39   void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
40                             FilesMade *FM) override;
41 
42   StringRef getName() const override { return "SarifDiagnostics"; }
43   PathGenerationScheme getGenerationScheme() const override { return Minimal; }
44   bool supportsLogicalOpControlFlow() const override { return true; }
45   bool supportsCrossFileDiagnostics() const override { return true; }
46 };
47 } // end anonymous namespace
48 
49 void ento::createSarifDiagnosticConsumer(
50     AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
51     const std::string &Output, const Preprocessor &PP,
52     const cross_tu::CrossTranslationUnitContext &) {
53   C.push_back(new SarifDiagnostics(AnalyzerOpts, Output, PP.getLangOpts()));
54 }
55 
56 static StringRef getFileName(const FileEntry &FE) {
57   StringRef Filename = FE.tryGetRealPathName();
58   if (Filename.empty())
59     Filename = FE.getName();
60   return Filename;
61 }
62 
63 static std::string percentEncodeURICharacter(char C) {
64   // RFC 3986 claims alpha, numeric, and this handful of
65   // characters are not reserved for the path component and
66   // should be written out directly. Otherwise, percent
67   // encode the character and write that out instead of the
68   // reserved character.
69   if (llvm::isAlnum(C) ||
70       StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
71     return std::string(&C, 1);
72   return "%" + llvm::toHex(StringRef(&C, 1));
73 }
74 
75 static std::string fileNameToURI(StringRef Filename) {
76   llvm::SmallString<32> Ret = StringRef("file://");
77 
78   // Get the root name to see if it has a URI authority.
79   StringRef Root = sys::path::root_name(Filename);
80   if (Root.startswith("//")) {
81     // There is an authority, so add it to the URI.
82     Ret += Root.drop_front(2).str();
83   } else if (!Root.empty()) {
84     // There is no authority, so end the component and add the root to the URI.
85     Ret += Twine("/" + Root).str();
86   }
87 
88   auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
89   assert(Iter != End && "Expected there to be a non-root path component.");
90   // Add the rest of the path components, encoding any reserved characters;
91   // we skip past the first path component, as it was handled it above.
92   std::for_each(++Iter, End, [&Ret](StringRef Component) {
93     // For reasons unknown to me, we may get a backslash with Windows native
94     // paths for the initial backslash following the drive component, which
95     // we need to ignore as a URI path part.
96     if (Component == "\\")
97       return;
98 
99     // Add the separator between the previous path part and the one being
100     // currently processed.
101     Ret += "/";
102 
103     // URI encode the part.
104     for (char C : Component) {
105       Ret += percentEncodeURICharacter(C);
106     }
107   });
108 
109   return Ret.str().str();
110 }
111 
112 static json::Object createArtifactLocation(const FileEntry &FE) {
113   return json::Object{{"uri", fileNameToURI(getFileName(FE))}};
114 }
115 
116 static json::Object createArtifact(const FileEntry &FE) {
117   return json::Object{{"location", createArtifactLocation(FE)},
118                       {"roles", json::Array{"resultFile"}},
119                       {"length", FE.getSize()},
120                       {"mimeType", "text/plain"}};
121 }
122 
123 static json::Object createArtifactLocation(const FileEntry &FE,
124                                            json::Array &Artifacts) {
125   std::string FileURI = fileNameToURI(getFileName(FE));
126 
127   // See if the Artifacts array contains this URI already. If it does not,
128   // create a new artifact object to add to the array.
129   auto I = llvm::find_if(Artifacts, [&](const json::Value &File) {
130     if (const json::Object *Obj = File.getAsObject()) {
131       if (const json::Object *FileLoc = Obj->getObject("location")) {
132         Optional<StringRef> URI = FileLoc->getString("uri");
133         return URI && URI->equals(FileURI);
134       }
135     }
136     return false;
137   });
138 
139   // Calculate the index within the artifact array so it can be stored in
140   // the JSON object.
141   auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I));
142   if (I == Artifacts.end())
143     Artifacts.push_back(createArtifact(FE));
144 
145   return json::Object{{"uri", FileURI}, {"index", Index}};
146 }
147 
148 static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc,
149                                     unsigned int TokenLen = 0) {
150   assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
151 
152   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc);
153   assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) &&
154          "position in file is before column number?");
155 
156   bool InvalidBuffer = false;
157   const MemoryBuffer *Buf = SM.getBuffer(LocInfo.first, &InvalidBuffer);
158   assert(!InvalidBuffer && "got an invalid buffer for the location's file");
159   assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
160          "token extends past end of buffer?");
161 
162   // Adjust the offset to be the start of the line, since we'll be counting
163   // Unicode characters from there until our column offset.
164   unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1);
165   unsigned int Ret = 1;
166   while (Off < (LocInfo.second + TokenLen)) {
167     Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
168     Ret++;
169   }
170 
171   return Ret;
172 }
173 
174 static json::Object createTextRegion(const LangOptions &LO, SourceRange R,
175                                      const SourceManager &SM) {
176   json::Object Region{
177       {"startLine", SM.getExpansionLineNumber(R.getBegin())},
178       {"startColumn", adjustColumnPos(SM, R.getBegin())},
179   };
180   if (R.getBegin() == R.getEnd()) {
181     Region["endColumn"] = adjustColumnPos(SM, R.getBegin());
182   } else {
183     Region["endLine"] = SM.getExpansionLineNumber(R.getEnd());
184     Region["endColumn"] = adjustColumnPos(
185         SM, R.getEnd(),
186         Lexer::MeasureTokenLength(R.getEnd(), SM, LO));
187   }
188   return Region;
189 }
190 
191 static json::Object createPhysicalLocation(const LangOptions &LO,
192                                            SourceRange R, const FileEntry &FE,
193                                            const SourceManager &SMgr,
194                                            json::Array &Artifacts) {
195   return json::Object{
196       {{"artifactLocation", createArtifactLocation(FE, Artifacts)},
197        {"region", createTextRegion(LO, R, SMgr)}}};
198 }
199 
200 enum class Importance { Important, Essential, Unimportant };
201 
202 static StringRef importanceToStr(Importance I) {
203   switch (I) {
204   case Importance::Important:
205     return "important";
206   case Importance::Essential:
207     return "essential";
208   case Importance::Unimportant:
209     return "unimportant";
210   }
211   llvm_unreachable("Fully covered switch is not so fully covered");
212 }
213 
214 static json::Object createThreadFlowLocation(json::Object &&Location,
215                                              Importance I) {
216   return json::Object{{"location", std::move(Location)},
217                       {"importance", importanceToStr(I)}};
218 }
219 
220 static json::Object createMessage(StringRef Text) {
221   return json::Object{{"text", Text.str()}};
222 }
223 
224 static json::Object createLocation(json::Object &&PhysicalLocation,
225                                    StringRef Message = "") {
226   json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
227   if (!Message.empty())
228     Ret.insert({"message", createMessage(Message)});
229   return Ret;
230 }
231 
232 static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
233   switch (Piece.getKind()) {
234   case PathDiagnosticPiece::Call:
235   case PathDiagnosticPiece::Macro:
236   case PathDiagnosticPiece::Note:
237   case PathDiagnosticPiece::PopUp:
238     // FIXME: What should be reported here?
239     break;
240   case PathDiagnosticPiece::Event:
241     return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
242                                                      : Importance::Essential;
243   case PathDiagnosticPiece::ControlFlow:
244     return Importance::Unimportant;
245   }
246   return Importance::Unimportant;
247 }
248 
249 static json::Object createThreadFlow(const LangOptions &LO,
250                                      const PathPieces &Pieces,
251                                      json::Array &Artifacts) {
252   const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
253   json::Array Locations;
254   for (const auto &Piece : Pieces) {
255     const PathDiagnosticLocation &P = Piece->getLocation();
256     Locations.push_back(createThreadFlowLocation(
257         createLocation(createPhysicalLocation(
258                            LO, P.asRange(),
259                            *P.asLocation().getExpansionLoc().getFileEntry(),
260                            SMgr, Artifacts),
261                        Piece->getString()),
262         calculateImportance(*Piece)));
263   }
264   return json::Object{{"locations", std::move(Locations)}};
265 }
266 
267 static json::Object createCodeFlow(const LangOptions &LO,
268                                    const PathPieces &Pieces,
269                                    json::Array &Artifacts) {
270   return json::Object{
271       {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}};
272 }
273 
274 static json::Object createResult(const LangOptions &LO,
275                                  const PathDiagnostic &Diag,
276                                  json::Array &Artifacts,
277                                  const StringMap<unsigned> &RuleMapping) {
278   const PathPieces &Path = Diag.path.flatten(false);
279   const SourceManager &SMgr = Path.front()->getLocation().getManager();
280 
281   auto Iter = RuleMapping.find(Diag.getCheckerName());
282   assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?");
283 
284   return json::Object{
285       {"message", createMessage(Diag.getVerboseDescription())},
286       {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}},
287       {"locations",
288        json::Array{createLocation(createPhysicalLocation(
289            LO, Diag.getLocation().asRange(),
290            *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(),
291            SMgr, Artifacts))}},
292       {"ruleIndex", Iter->getValue()},
293       {"ruleId", Diag.getCheckerName()}};
294 }
295 
296 static StringRef getRuleDescription(StringRef CheckName) {
297   return llvm::StringSwitch<StringRef>(CheckName)
298 #define GET_CHECKERS
299 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)                 \
300   .Case(FULLNAME, HELPTEXT)
301 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
302 #undef CHECKER
303 #undef GET_CHECKERS
304       ;
305 }
306 
307 static StringRef getRuleHelpURIStr(StringRef CheckName) {
308   return llvm::StringSwitch<StringRef>(CheckName)
309 #define GET_CHECKERS
310 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)                 \
311   .Case(FULLNAME, DOC_URI)
312 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
313 #undef CHECKER
314 #undef GET_CHECKERS
315       ;
316 }
317 
318 static json::Object createRule(const PathDiagnostic &Diag) {
319   StringRef CheckName = Diag.getCheckerName();
320   json::Object Ret{
321       {"fullDescription", createMessage(getRuleDescription(CheckName))},
322       {"name", CheckName},
323       {"id", CheckName}};
324 
325   std::string RuleURI = getRuleHelpURIStr(CheckName);
326   if (!RuleURI.empty())
327     Ret["helpUri"] = RuleURI;
328 
329   return Ret;
330 }
331 
332 static json::Array createRules(std::vector<const PathDiagnostic *> &Diags,
333                                StringMap<unsigned> &RuleMapping) {
334   json::Array Rules;
335   llvm::StringSet<> Seen;
336 
337   llvm::for_each(Diags, [&](const PathDiagnostic *D) {
338     StringRef RuleID = D->getCheckerName();
339     std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID);
340     if (P.second) {
341       RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index.
342       Rules.push_back(createRule(*D));
343     }
344   });
345 
346   return Rules;
347 }
348 
349 static json::Object createTool(std::vector<const PathDiagnostic *> &Diags,
350                                StringMap<unsigned> &RuleMapping) {
351   return json::Object{
352       {"driver", json::Object{{"name", "clang"},
353                               {"fullName", "clang static analyzer"},
354                               {"language", "en-US"},
355                               {"version", getClangFullVersion()},
356                               {"rules", createRules(Diags, RuleMapping)}}}};
357 }
358 
359 static json::Object createRun(const LangOptions &LO,
360                               std::vector<const PathDiagnostic *> &Diags) {
361   json::Array Results, Artifacts;
362   StringMap<unsigned> RuleMapping;
363   json::Object Tool = createTool(Diags, RuleMapping);
364 
365   llvm::for_each(Diags, [&](const PathDiagnostic *D) {
366     Results.push_back(createResult(LO, *D, Artifacts, RuleMapping));
367   });
368 
369   return json::Object{{"tool", std::move(Tool)},
370                       {"results", std::move(Results)},
371                       {"artifacts", std::move(Artifacts)},
372                       {"columnKind", "unicodeCodePoints"}};
373 }
374 
375 void SarifDiagnostics::FlushDiagnosticsImpl(
376     std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
377   // We currently overwrite the file if it already exists. However, it may be
378   // useful to add a feature someday that allows the user to append a run to an
379   // existing SARIF file. One danger from that approach is that the size of the
380   // file can become large very quickly, so decoding into JSON to append a run
381   // may be an expensive operation.
382   std::error_code EC;
383   llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_Text);
384   if (EC) {
385     llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
386     return;
387   }
388   json::Object Sarif{
389       {"$schema",
390        "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"},
391       {"version", "2.1.0"},
392       {"runs", json::Array{createRun(LO, Diags)}}};
393   OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif)));
394 }
395