1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
14 /// - \ref SarifRule
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/ConvertUTF.h"
26 #include "llvm/Support/JSON.h"
27 #include "llvm/Support/Path.h"
28 
29 #include <optional>
30 #include <string>
31 #include <utility>
32 
33 using namespace clang;
34 using namespace llvm;
35 
36 using clang::detail::SarifArtifact;
37 using clang::detail::SarifArtifactLocation;
38 
39 static StringRef getFileName(const FileEntry &FE) {
40   StringRef Filename = FE.tryGetRealPathName();
41   if (Filename.empty())
42     Filename = FE.getName();
43   return Filename;
44 }
45 /// \name URI
46 /// @{
47 
48 /// \internal
49 /// \brief
50 /// Return the RFC3986 encoding of the input character.
51 ///
52 /// \param C Character to encode to RFC3986.
53 ///
54 /// \return The RFC3986 representation of \c C.
55 static std::string percentEncodeURICharacter(char C) {
56   // RFC 3986 claims alpha, numeric, and this handful of
57   // characters are not reserved for the path component and
58   // should be written out directly. Otherwise, percent
59   // encode the character and write that out instead of the
60   // reserved character.
61   if (llvm::isAlnum(C) ||
62       StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
63     return std::string(&C, 1);
64   return "%" + llvm::toHex(StringRef(&C, 1));
65 }
66 
67 /// \internal
68 /// \brief Return a URI representing the given file name.
69 ///
70 /// \param Filename The filename to be represented as URI.
71 ///
72 /// \return RFC3986 URI representing the input file name.
73 static std::string fileNameToURI(StringRef Filename) {
74   SmallString<32> Ret = StringRef("file://");
75 
76   // Get the root name to see if it has a URI authority.
77   StringRef Root = sys::path::root_name(Filename);
78   if (Root.startswith("//")) {
79     // There is an authority, so add it to the URI.
80     Ret += Root.drop_front(2).str();
81   } else if (!Root.empty()) {
82     // There is no authority, so end the component and add the root to the URI.
83     Ret += Twine("/" + Root).str();
84   }
85 
86   auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
87   assert(Iter != End && "Expected there to be a non-root path component.");
88   // Add the rest of the path components, encoding any reserved characters;
89   // we skip past the first path component, as it was handled it above.
90   std::for_each(++Iter, End, [&Ret](StringRef Component) {
91     // For reasons unknown to me, we may get a backslash with Windows native
92     // paths for the initial backslash following the drive component, which
93     // we need to ignore as a URI path part.
94     if (Component == "\\")
95       return;
96 
97     // Add the separator between the previous path part and the one being
98     // currently processed.
99     Ret += "/";
100 
101     // URI encode the part.
102     for (char C : Component) {
103       Ret += percentEncodeURICharacter(C);
104     }
105   });
106 
107   return std::string(Ret);
108 }
109 ///  @}
110 
111 /// \brief Calculate the column position expressed in the number of UTF-8 code
112 /// points from column start to the source location
113 ///
114 /// \param Loc The source location whose column needs to be calculated.
115 /// \param TokenLen Optional hint for when the token is multiple bytes long.
116 ///
117 /// \return The column number as a UTF-8 aware byte offset from column start to
118 /// the effective source location.
119 static unsigned int adjustColumnPos(FullSourceLoc Loc,
120                                     unsigned int TokenLen = 0) {
121   assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
122 
123   std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
124   std::optional<MemoryBufferRef> Buf =
125       Loc.getManager().getBufferOrNone(LocInfo.first);
126   assert(Buf && "got an invalid buffer for the location's file");
127   assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
128          "token extends past end of buffer?");
129 
130   // Adjust the offset to be the start of the line, since we'll be counting
131   // Unicode characters from there until our column offset.
132   unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
133   unsigned int Ret = 1;
134   while (Off < (LocInfo.second + TokenLen)) {
135     Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
136     Ret++;
137   }
138 
139   return Ret;
140 }
141 
142 /// \name SARIF Utilities
143 /// @{
144 
145 /// \internal
146 json::Object createMessage(StringRef Text) {
147   return json::Object{{"text", Text.str()}};
148 }
149 
150 /// \internal
151 /// \pre CharSourceRange must be a token range
152 static json::Object createTextRegion(const SourceManager &SM,
153                                      const CharSourceRange &R) {
154   FullSourceLoc BeginCharLoc{R.getBegin(), SM};
155   FullSourceLoc EndCharLoc{R.getEnd(), SM};
156   json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()},
157                       {"startColumn", adjustColumnPos(BeginCharLoc)}};
158 
159   if (BeginCharLoc == EndCharLoc) {
160     Region["endColumn"] = adjustColumnPos(BeginCharLoc);
161   } else {
162     Region["endLine"] = EndCharLoc.getExpansionLineNumber();
163     Region["endColumn"] = adjustColumnPos(EndCharLoc);
164   }
165   return Region;
166 }
167 
168 static json::Object createLocation(json::Object &&PhysicalLocation,
169                                    StringRef Message = "") {
170   json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
171   if (!Message.empty())
172     Ret.insert({"message", createMessage(Message)});
173   return Ret;
174 }
175 
176 static StringRef importanceToStr(ThreadFlowImportance I) {
177   switch (I) {
178   case ThreadFlowImportance::Important:
179     return "important";
180   case ThreadFlowImportance::Essential:
181     return "essential";
182   case ThreadFlowImportance::Unimportant:
183     return "unimportant";
184   }
185   llvm_unreachable("Fully covered switch is not so fully covered");
186 }
187 
188 static StringRef resultLevelToStr(SarifResultLevel R) {
189   switch (R) {
190   case SarifResultLevel::None:
191     return "none";
192   case SarifResultLevel::Note:
193     return "note";
194   case SarifResultLevel::Warning:
195     return "warning";
196   case SarifResultLevel::Error:
197     return "error";
198   }
199   llvm_unreachable("Potentially un-handled SarifResultLevel. "
200                    "Is the switch not fully covered?");
201 }
202 
203 static json::Object
204 createThreadFlowLocation(json::Object &&Location,
205                          const ThreadFlowImportance &Importance) {
206   return json::Object{{"location", std::move(Location)},
207                       {"importance", importanceToStr(Importance)}};
208 }
209 ///  @}
210 
211 json::Object
212 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
213   assert(R.isValid() &&
214          "Cannot create a physicalLocation from invalid SourceRange!");
215   assert(R.isCharRange() &&
216          "Cannot create a physicalLocation from a token range!");
217   FullSourceLoc Start{R.getBegin(), SourceMgr};
218   const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
219   assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
220 
221   const std::string &FileURI = fileNameToURI(getFileName(*FE));
222   auto I = CurrentArtifacts.find(FileURI);
223 
224   if (I == CurrentArtifacts.end()) {
225     uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
226     const SarifArtifactLocation &Location =
227         SarifArtifactLocation::create(FileURI).setIndex(Idx);
228     const SarifArtifact &Artifact = SarifArtifact::create(Location)
229                                         .setRoles({"resultFile"})
230                                         .setLength(FE->getSize())
231                                         .setMimeType("text/plain");
232     auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
233     // If inserted, ensure the original iterator points to the newly inserted
234     // element, so it can be used downstream.
235     if (StatusIter.second)
236       I = StatusIter.first;
237   }
238   assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
239   const SarifArtifactLocation &Location = I->second.Location;
240   json::Object ArtifactLocationObject{{"uri", Location.URI}};
241   if (Location.Index.has_value())
242     ArtifactLocationObject["index"] = *Location.Index;
243   return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)},
244                        {"region", createTextRegion(SourceMgr, R)}}};
245 }
246 
247 json::Object &SarifDocumentWriter::getCurrentTool() {
248   assert(!Closed && "SARIF Document is closed. "
249                     "Need to call createRun() before using getcurrentTool!");
250 
251   // Since Closed = false here, expect there to be at least 1 Run, anything
252   // else is an invalid state.
253   assert(!Runs.empty() && "There are no runs associated with the document!");
254 
255   return *Runs.back().getAsObject()->get("tool")->getAsObject();
256 }
257 
258 void SarifDocumentWriter::reset() {
259   CurrentRules.clear();
260   CurrentArtifacts.clear();
261 }
262 
263 void SarifDocumentWriter::endRun() {
264   // Exit early if trying to close a closed Document.
265   if (Closed) {
266     reset();
267     return;
268   }
269 
270   // Since Closed = false here, expect there to be at least 1 Run, anything
271   // else is an invalid state.
272   assert(!Runs.empty() && "There are no runs associated with the document!");
273 
274   // Flush all the rules.
275   json::Object &Tool = getCurrentTool();
276   json::Array Rules;
277   for (const SarifRule &R : CurrentRules) {
278     json::Object Config{
279         {"enabled", R.DefaultConfiguration.Enabled},
280         {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
281         {"rank", R.DefaultConfiguration.Rank}};
282     json::Object Rule{
283         {"name", R.Name},
284         {"id", R.Id},
285         {"fullDescription", json::Object{{"text", R.Description}}},
286         {"defaultConfiguration", std::move(Config)}};
287     if (!R.HelpURI.empty())
288       Rule["helpUri"] = R.HelpURI;
289     Rules.emplace_back(std::move(Rule));
290   }
291   json::Object &Driver = *Tool.getObject("driver");
292   Driver["rules"] = std::move(Rules);
293 
294   // Flush all the artifacts.
295   json::Object &Run = getCurrentRun();
296   json::Array *Artifacts = Run.getArray("artifacts");
297   SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec;
298   for (const auto &[K, V] : CurrentArtifacts)
299     Vec.emplace_back(K, V);
300   llvm::sort(Vec, llvm::less_first());
301   for (const auto &[_, A] : Vec) {
302     json::Object Loc{{"uri", A.Location.URI}};
303     if (A.Location.Index.has_value()) {
304       Loc["index"] = static_cast<int64_t>(*A.Location.Index);
305     }
306     json::Object Artifact;
307     Artifact["location"] = std::move(Loc);
308     if (A.Length.has_value())
309       Artifact["length"] = static_cast<int64_t>(*A.Length);
310     if (!A.Roles.empty())
311       Artifact["roles"] = json::Array(A.Roles);
312     if (!A.MimeType.empty())
313       Artifact["mimeType"] = A.MimeType;
314     if (A.Offset.has_value())
315       Artifact["offset"] = *A.Offset;
316     Artifacts->push_back(json::Value(std::move(Artifact)));
317   }
318 
319   // Clear, reset temporaries before next run.
320   reset();
321 
322   // Mark the document as closed.
323   Closed = true;
324 }
325 
326 json::Array
327 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
328   json::Object Ret{{"locations", json::Array{}}};
329   json::Array Locs;
330   for (const auto &ThreadFlow : ThreadFlows) {
331     json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
332     json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
333     Locs.emplace_back(
334         createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
335   }
336   Ret["locations"] = std::move(Locs);
337   return json::Array{std::move(Ret)};
338 }
339 
340 json::Object
341 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
342   return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
343 }
344 
345 void SarifDocumentWriter::createRun(StringRef ShortToolName,
346                                     StringRef LongToolName,
347                                     StringRef ToolVersion) {
348   // Clear resources associated with a previous run.
349   endRun();
350 
351   // Signify a new run has begun.
352   Closed = false;
353 
354   json::Object Tool{
355       {"driver",
356        json::Object{{"name", ShortToolName},
357                     {"fullName", LongToolName},
358                     {"language", "en-US"},
359                     {"version", ToolVersion},
360                     {"informationUri",
361                      "https://clang.llvm.org/docs/UsersManual.html"}}}};
362   json::Object TheRun{{"tool", std::move(Tool)},
363                       {"results", {}},
364                       {"artifacts", {}},
365                       {"columnKind", "unicodeCodePoints"}};
366   Runs.emplace_back(std::move(TheRun));
367 }
368 
369 json::Object &SarifDocumentWriter::getCurrentRun() {
370   assert(!Closed &&
371          "SARIF Document is closed. "
372          "Can only getCurrentRun() if document is opened via createRun(), "
373          "create a run first");
374 
375   // Since Closed = false here, expect there to be at least 1 Run, anything
376   // else is an invalid state.
377   assert(!Runs.empty() && "There are no runs associated with the document!");
378   return *Runs.back().getAsObject();
379 }
380 
381 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
382   size_t Ret = CurrentRules.size();
383   CurrentRules.emplace_back(Rule);
384   return Ret;
385 }
386 
387 void SarifDocumentWriter::appendResult(const SarifResult &Result) {
388   size_t RuleIdx = Result.RuleIdx;
389   assert(RuleIdx < CurrentRules.size() &&
390          "Trying to reference a rule that doesn't exist");
391   const SarifRule &Rule = CurrentRules[RuleIdx];
392   assert(Rule.DefaultConfiguration.Enabled &&
393          "Cannot add a result referencing a disabled Rule");
394   json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
395                    {"ruleIndex", static_cast<int64_t>(RuleIdx)},
396                    {"ruleId", Rule.Id}};
397   if (!Result.Locations.empty()) {
398     json::Array Locs;
399     for (auto &Range : Result.Locations) {
400       Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
401     }
402     Ret["locations"] = std::move(Locs);
403   }
404   if (!Result.ThreadFlows.empty())
405     Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
406 
407   Ret["level"] = resultLevelToStr(
408       Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
409 
410   json::Object &Run = getCurrentRun();
411   json::Array *Results = Run.getArray("results");
412   Results->emplace_back(std::move(Ret));
413 }
414 
415 json::Object SarifDocumentWriter::createDocument() {
416   // Flush all temporaries to their destinations if needed.
417   endRun();
418 
419   json::Object Doc{
420       {"$schema", SchemaURI},
421       {"version", SchemaVersion},
422   };
423   if (!Runs.empty())
424     Doc["runs"] = json::Array(Runs);
425   return Doc;
426 }
427