1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
14 /// - \ref SarifRule
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/ConvertUTF.h"
25 #include "llvm/Support/JSON.h"
26 #include "llvm/Support/Path.h"
27 
28 #include <optional>
29 #include <string>
30 #include <utility>
31 
32 using namespace clang;
33 using namespace llvm;
34 
35 using clang::detail::SarifArtifact;
36 using clang::detail::SarifArtifactLocation;
37 
getFileName(FileEntryRef FE)38 static StringRef getFileName(FileEntryRef FE) {
39   StringRef Filename = FE.getFileEntry().tryGetRealPathName();
40   if (Filename.empty())
41     Filename = FE.getName();
42   return Filename;
43 }
44 /// \name URI
45 /// @{
46 
47 /// \internal
48 /// \brief
49 /// Return the RFC3986 encoding of the input character.
50 ///
51 /// \param C Character to encode to RFC3986.
52 ///
53 /// \return The RFC3986 representation of \c C.
percentEncodeURICharacter(char C)54 static std::string percentEncodeURICharacter(char C) {
55   // RFC 3986 claims alpha, numeric, and this handful of
56   // characters are not reserved for the path component and
57   // should be written out directly. Otherwise, percent
58   // encode the character and write that out instead of the
59   // reserved character.
60   if (llvm::isAlnum(C) ||
61       StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
62     return std::string(&C, 1);
63   return "%" + llvm::toHex(StringRef(&C, 1));
64 }
65 
66 /// \internal
67 /// \brief Return a URI representing the given file name.
68 ///
69 /// \param Filename The filename to be represented as URI.
70 ///
71 /// \return RFC3986 URI representing the input file name.
fileNameToURI(StringRef Filename)72 static std::string fileNameToURI(StringRef Filename) {
73   SmallString<32> Ret = StringRef("file://");
74 
75   // Get the root name to see if it has a URI authority.
76   StringRef Root = sys::path::root_name(Filename);
77   if (Root.starts_with("//")) {
78     // There is an authority, so add it to the URI.
79     Ret += Root.drop_front(2).str();
80   } else if (!Root.empty()) {
81     // There is no authority, so end the component and add the root to the URI.
82     Ret += Twine("/" + Root).str();
83   }
84 
85   auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
86   assert(Iter != End && "Expected there to be a non-root path component.");
87   // Add the rest of the path components, encoding any reserved characters;
88   // we skip past the first path component, as it was handled it above.
89   for (StringRef Component : llvm::make_range(++Iter, End)) {
90     // For reasons unknown to me, we may get a backslash with Windows native
91     // paths for the initial backslash following the drive component, which
92     // we need to ignore as a URI path part.
93     if (Component == "\\")
94       continue;
95 
96     // Add the separator between the previous path part and the one being
97     // currently processed.
98     Ret += "/";
99 
100     // URI encode the part.
101     for (char C : Component) {
102       Ret += percentEncodeURICharacter(C);
103     }
104   }
105 
106   return std::string(Ret);
107 }
108 ///  @}
109 
110 /// \brief Calculate the column position expressed in the number of UTF-8 code
111 /// points from column start to the source location
112 ///
113 /// \param Loc The source location whose column needs to be calculated.
114 /// \param TokenLen Optional hint for when the token is multiple bytes long.
115 ///
116 /// \return The column number as a UTF-8 aware byte offset from column start to
117 /// the effective source location.
adjustColumnPos(FullSourceLoc Loc,unsigned int TokenLen=0)118 static unsigned int adjustColumnPos(FullSourceLoc Loc,
119                                     unsigned int TokenLen = 0) {
120   assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
121 
122   std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
123   std::optional<MemoryBufferRef> Buf =
124       Loc.getManager().getBufferOrNone(LocInfo.first);
125   assert(Buf && "got an invalid buffer for the location's file");
126   assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
127          "token extends past end of buffer?");
128 
129   // Adjust the offset to be the start of the line, since we'll be counting
130   // Unicode characters from there until our column offset.
131   unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
132   unsigned int Ret = 1;
133   while (Off < (LocInfo.second + TokenLen)) {
134     Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
135     Ret++;
136   }
137 
138   return Ret;
139 }
140 
141 /// \name SARIF Utilities
142 /// @{
143 
144 /// \internal
createMessage(StringRef Text)145 json::Object createMessage(StringRef Text) {
146   return json::Object{{"text", Text.str()}};
147 }
148 
149 /// \internal
150 /// \pre CharSourceRange must be a token range
createTextRegion(const SourceManager & SM,const CharSourceRange & R)151 static json::Object createTextRegion(const SourceManager &SM,
152                                      const CharSourceRange &R) {
153   FullSourceLoc BeginCharLoc{R.getBegin(), SM};
154   FullSourceLoc EndCharLoc{R.getEnd(), SM};
155   json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
156                       {"startColumn", adjustColumnPos(BeginCharLoc)}};
157 
158   if (BeginCharLoc == EndCharLoc) {
159     Region["endColumn"] = adjustColumnPos(BeginCharLoc);
160   } else {
161     Region["endLine"] = EndCharLoc.getExpansionLineNumber();
162     Region["endColumn"] = adjustColumnPos(EndCharLoc);
163   }
164   return Region;
165 }
166 
createLocation(json::Object && PhysicalLocation,StringRef Message="")167 static json::Object createLocation(json::Object &&PhysicalLocation,
168                                    StringRef Message = "") {
169   json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
170   if (!Message.empty())
171     Ret.insert({"message", createMessage(Message)});
172   return Ret;
173 }
174 
importanceToStr(ThreadFlowImportance I)175 static StringRef importanceToStr(ThreadFlowImportance I) {
176   switch (I) {
177   case ThreadFlowImportance::Important:
178     return "important";
179   case ThreadFlowImportance::Essential:
180     return "essential";
181   case ThreadFlowImportance::Unimportant:
182     return "unimportant";
183   }
184   llvm_unreachable("Fully covered switch is not so fully covered");
185 }
186 
resultLevelToStr(SarifResultLevel R)187 static StringRef resultLevelToStr(SarifResultLevel R) {
188   switch (R) {
189   case SarifResultLevel::None:
190     return "none";
191   case SarifResultLevel::Note:
192     return "note";
193   case SarifResultLevel::Warning:
194     return "warning";
195   case SarifResultLevel::Error:
196     return "error";
197   }
198   llvm_unreachable("Potentially un-handled SarifResultLevel. "
199                    "Is the switch not fully covered?");
200 }
201 
202 static json::Object
createThreadFlowLocation(json::Object && Location,const ThreadFlowImportance & Importance)203 createThreadFlowLocation(json::Object &&Location,
204                          const ThreadFlowImportance &Importance) {
205   return json::Object{{"location", std::move(Location)},
206                       {"importance", importanceToStr(Importance)}};
207 }
208 ///  @}
209 
210 json::Object
createPhysicalLocation(const CharSourceRange & R)211 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
212   assert(R.isValid() &&
213          "Cannot create a physicalLocation from invalid SourceRange!");
214   assert(R.isCharRange() &&
215          "Cannot create a physicalLocation from a token range!");
216   FullSourceLoc Start{R.getBegin(), SourceMgr};
217   OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef();
218   assert(FE && "Diagnostic does not exist within a valid file!");
219 
220   const std::string &FileURI = fileNameToURI(getFileName(*FE));
221   auto I = CurrentArtifacts.find(FileURI);
222 
223   if (I == CurrentArtifacts.end()) {
224     uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
225     const SarifArtifactLocation &Location =
226         SarifArtifactLocation::create(FileURI).setIndex(Idx);
227     const SarifArtifact &Artifact = SarifArtifact::create(Location)
228                                         .setRoles({"resultFile"})
229                                         .setLength(FE->getSize())
230                                         .setMimeType("text/plain");
231     auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
232     // If inserted, ensure the original iterator points to the newly inserted
233     // element, so it can be used downstream.
234     if (StatusIter.second)
235       I = StatusIter.first;
236   }
237   assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
238   const SarifArtifactLocation &Location = I->second.Location;
239   json::Object ArtifactLocationObject{{"uri", Location.URI}};
240   if (Location.Index.has_value())
241     ArtifactLocationObject["index"] = *Location.Index;
242   return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
243                        {"region", createTextRegion(SourceMgr, R)}}};
244 }
245 
getCurrentTool()246 json::Object &SarifDocumentWriter::getCurrentTool() {
247   assert(!Closed && "SARIF Document is closed. "
248                     "Need to call createRun() before using getcurrentTool!");
249 
250   // Since Closed = false here, expect there to be at least 1 Run, anything
251   // else is an invalid state.
252   assert(!Runs.empty() && "There are no runs associated with the document!");
253 
254   return *Runs.back().getAsObject()->get("tool")->getAsObject();
255 }
256 
reset()257 void SarifDocumentWriter::reset() {
258   CurrentRules.clear();
259   CurrentArtifacts.clear();
260 }
261 
endRun()262 void SarifDocumentWriter::endRun() {
263   // Exit early if trying to close a closed Document.
264   if (Closed) {
265     reset();
266     return;
267   }
268 
269   // Since Closed = false here, expect there to be at least 1 Run, anything
270   // else is an invalid state.
271   assert(!Runs.empty() && "There are no runs associated with the document!");
272 
273   // Flush all the rules.
274   json::Object &Tool = getCurrentTool();
275   json::Array Rules;
276   for (const SarifRule &R : CurrentRules) {
277     json::Object Config{
278         {"enabled", R.DefaultConfiguration.Enabled},
279         {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
280         {"rank", R.DefaultConfiguration.Rank}};
281     json::Object Rule{
282         {"name", R.Name},
283         {"id", R.Id},
284         {"fullDescription", json::Object{{"text", R.Description}}},
285         {"defaultConfiguration", std::move(Config)}};
286     if (!R.HelpURI.empty())
287       Rule["helpUri"] = R.HelpURI;
288     Rules.emplace_back(std::move(Rule));
289   }
290   json::Object &Driver = *Tool.getObject("driver");
291   Driver["rules"] = std::move(Rules);
292 
293   // Flush all the artifacts.
294   json::Object &Run = getCurrentRun();
295   json::Array *Artifacts = Run.getArray("artifacts");
296   SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec;
297   for (const auto &[K, V] : CurrentArtifacts)
298     Vec.emplace_back(K, V);
299   llvm::sort(Vec, llvm::less_first());
300   for (const auto &[_, A] : Vec) {
301     json::Object Loc{{"uri", A.Location.URI}};
302     if (A.Location.Index.has_value()) {
303       Loc["index"] = static_cast<int64_t>(*A.Location.Index);
304     }
305     json::Object Artifact;
306     Artifact["location"] = std::move(Loc);
307     if (A.Length.has_value())
308       Artifact["length"] = static_cast<int64_t>(*A.Length);
309     if (!A.Roles.empty())
310       Artifact["roles"] = json::Array(A.Roles);
311     if (!A.MimeType.empty())
312       Artifact["mimeType"] = A.MimeType;
313     if (A.Offset.has_value())
314       Artifact["offset"] = *A.Offset;
315     Artifacts->push_back(json::Value(std::move(Artifact)));
316   }
317 
318   // Clear, reset temporaries before next run.
319   reset();
320 
321   // Mark the document as closed.
322   Closed = true;
323 }
324 
325 json::Array
createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows)326 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
327   json::Object Ret{{"locations", json::Array{}}};
328   json::Array Locs;
329   for (const auto &ThreadFlow : ThreadFlows) {
330     json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
331     json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
332     Locs.emplace_back(
333         createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
334   }
335   Ret["locations"] = std::move(Locs);
336   return json::Array{std::move(Ret)};
337 }
338 
339 json::Object
createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows)340 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
341   return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
342 }
343 
createRun(StringRef ShortToolName,StringRef LongToolName,StringRef ToolVersion)344 void SarifDocumentWriter::createRun(StringRef ShortToolName,
345                                     StringRef LongToolName,
346                                     StringRef ToolVersion) {
347   // Clear resources associated with a previous run.
348   endRun();
349 
350   // Signify a new run has begun.
351   Closed = false;
352 
353   json::Object Tool{
354       {"driver",
355        json::Object{{"name", ShortToolName},
356                     {"fullName", LongToolName},
357                     {"language", "en-US"},
358                     {"version", ToolVersion},
359                     {"informationUri",
360                      "https://clang.llvm.org/docs/UsersManual.html"}}}};
361   json::Object TheRun{{"tool", std::move(Tool)},
362                       {"results", {}},
363                       {"artifacts", {}},
364                       {"columnKind", "unicodeCodePoints"}};
365   Runs.emplace_back(std::move(TheRun));
366 }
367 
getCurrentRun()368 json::Object &SarifDocumentWriter::getCurrentRun() {
369   assert(!Closed &&
370          "SARIF Document is closed. "
371          "Can only getCurrentRun() if document is opened via createRun(), "
372          "create a run first");
373 
374   // Since Closed = false here, expect there to be at least 1 Run, anything
375   // else is an invalid state.
376   assert(!Runs.empty() && "There are no runs associated with the document!");
377   return *Runs.back().getAsObject();
378 }
379 
createRule(const SarifRule & Rule)380 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
381   size_t Ret = CurrentRules.size();
382   CurrentRules.emplace_back(Rule);
383   return Ret;
384 }
385 
appendResult(const SarifResult & Result)386 void SarifDocumentWriter::appendResult(const SarifResult &Result) {
387   size_t RuleIdx = Result.RuleIdx;
388   assert(RuleIdx < CurrentRules.size() &&
389          "Trying to reference a rule that doesn't exist");
390   const SarifRule &Rule = CurrentRules[RuleIdx];
391   assert(Rule.DefaultConfiguration.Enabled &&
392          "Cannot add a result referencing a disabled Rule");
393   json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
394                    {"ruleIndex", static_cast<int64_t>(RuleIdx)},
395                    {"ruleId", Rule.Id}};
396   if (!Result.Locations.empty()) {
397     json::Array Locs;
398     for (auto &Range : Result.Locations) {
399       Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
400     }
401     Ret["locations"] = std::move(Locs);
402   }
403   if (!Result.ThreadFlows.empty())
404     Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
405 
406   Ret["level"] = resultLevelToStr(
407       Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
408 
409   json::Object &Run = getCurrentRun();
410   json::Array *Results = Run.getArray("results");
411   Results->emplace_back(std::move(Ret));
412 }
413 
createDocument()414 json::Object SarifDocumentWriter::createDocument() {
415   // Flush all temporaries to their destinations if needed.
416   endRun();
417 
418   json::Object Doc{
419       {"$schema", SchemaURI},
420       {"version", SchemaVersion},
421   };
422   if (!Runs.empty())
423     Doc["runs"] = json::Array(Runs);
424   return Doc;
425 }
426