1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
14 /// - \ref SarifRule
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/ConvertUTF.h"
25 #include "llvm/Support/JSON.h"
26 #include "llvm/Support/Path.h"
27 
28 #include <string>
29 #include <utility>
30 
31 using namespace clang;
32 using namespace llvm;
33 
34 using clang::detail::SarifArtifact;
35 using clang::detail::SarifArtifactLocation;
36 
37 static StringRef getFileName(const FileEntry &FE) {
38   StringRef Filename = FE.tryGetRealPathName();
39   if (Filename.empty())
40     Filename = FE.getName();
41   return Filename;
42 }
43 /// \name URI
44 /// @{
45 
46 /// \internal
47 /// \brief
48 /// Return the RFC3986 encoding of the input character.
49 ///
50 /// \param C Character to encode to RFC3986.
51 ///
52 /// \return The RFC3986 representation of \c C.
53 static std::string percentEncodeURICharacter(char C) {
54   // RFC 3986 claims alpha, numeric, and this handful of
55   // characters are not reserved for the path component and
56   // should be written out directly. Otherwise, percent
57   // encode the character and write that out instead of the
58   // reserved character.
59   if (llvm::isAlnum(C) ||
60       StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
61     return std::string(&C, 1);
62   return "%" + llvm::toHex(StringRef(&C, 1));
63 }
64 
65 /// \internal
66 /// \brief Return a URI representing the given file name.
67 ///
68 /// \param Filename The filename to be represented as URI.
69 ///
70 /// \return RFC3986 URI representing the input file name.
71 static std::string fileNameToURI(StringRef Filename) {
72   SmallString<32> Ret = StringRef("file://");
73 
74   // Get the root name to see if it has a URI authority.
75   StringRef Root = sys::path::root_name(Filename);
76   if (Root.startswith("//")) {
77     // There is an authority, so add it to the URI.
78     Ret += Root.drop_front(2).str();
79   } else if (!Root.empty()) {
80     // There is no authority, so end the component and add the root to the URI.
81     Ret += Twine("/" + Root).str();
82   }
83 
84   auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
85   assert(Iter != End && "Expected there to be a non-root path component.");
86   // Add the rest of the path components, encoding any reserved characters;
87   // we skip past the first path component, as it was handled it above.
88   std::for_each(++Iter, End, [&Ret](StringRef Component) {
89     // For reasons unknown to me, we may get a backslash with Windows native
90     // paths for the initial backslash following the drive component, which
91     // we need to ignore as a URI path part.
92     if (Component == "\\")
93       return;
94 
95     // Add the separator between the previous path part and the one being
96     // currently processed.
97     Ret += "/";
98 
99     // URI encode the part.
100     for (char C : Component) {
101       Ret += percentEncodeURICharacter(C);
102     }
103   });
104 
105   return std::string(Ret);
106 }
107 ///  @}
108 
109 /// \brief Calculate the column position expressed in the number of UTF-8 code
110 /// points from column start to the source location
111 ///
112 /// \param Loc The source location whose column needs to be calculated.
113 /// \param TokenLen Optional hint for when the token is multiple bytes long.
114 ///
115 /// \return The column number as a UTF-8 aware byte offset from column start to
116 /// the effective source location.
117 static unsigned int adjustColumnPos(FullSourceLoc Loc,
118                                     unsigned int TokenLen = 0) {
119   assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
120 
121   std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
122   Optional<MemoryBufferRef> Buf =
123       Loc.getManager().getBufferOrNone(LocInfo.first);
124   assert(Buf && "got an invalid buffer for the location's file");
125   assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
126          "token extends past end of buffer?");
127 
128   // Adjust the offset to be the start of the line, since we'll be counting
129   // Unicode characters from there until our column offset.
130   unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
131   unsigned int Ret = 1;
132   while (Off < (LocInfo.second + TokenLen)) {
133     Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
134     Ret++;
135   }
136 
137   return Ret;
138 }
139 
140 /// \name SARIF Utilities
141 /// @{
142 
143 /// \internal
144 json::Object createMessage(StringRef Text) {
145   return json::Object{{"text", Text.str()}};
146 }
147 
148 /// \internal
149 /// \pre CharSourceRange must be a token range
150 static json::Object createTextRegion(const SourceManager &SM,
151                                      const CharSourceRange &R) {
152   FullSourceLoc FirstTokenLoc{R.getBegin(), SM};
153   FullSourceLoc LastTokenLoc{R.getEnd(), SM};
154   json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()},
155                       {"startColumn", adjustColumnPos(FirstTokenLoc)},
156                       {"endColumn", adjustColumnPos(LastTokenLoc)}};
157   if (FirstTokenLoc != LastTokenLoc) {
158     Region["endLine"] = LastTokenLoc.getExpansionLineNumber();
159   }
160   return Region;
161 }
162 
163 static json::Object createLocation(json::Object &&PhysicalLocation,
164                                    StringRef Message = "") {
165   json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
166   if (!Message.empty())
167     Ret.insert({"message", createMessage(Message)});
168   return Ret;
169 }
170 
171 static StringRef importanceToStr(ThreadFlowImportance I) {
172   switch (I) {
173   case ThreadFlowImportance::Important:
174     return "important";
175   case ThreadFlowImportance::Essential:
176     return "essential";
177   case ThreadFlowImportance::Unimportant:
178     return "unimportant";
179   }
180   llvm_unreachable("Fully covered switch is not so fully covered");
181 }
182 
183 static json::Object
184 createThreadFlowLocation(json::Object &&Location,
185                          const ThreadFlowImportance &Importance) {
186   return json::Object{{"location", std::move(Location)},
187                       {"importance", importanceToStr(Importance)}};
188 }
189 ///  @}
190 
191 json::Object
192 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
193   assert(R.isValid() &&
194          "Cannot create a physicalLocation from invalid SourceRange!");
195   assert(R.isCharRange() &&
196          "Cannot create a physicalLocation from a token range!");
197   FullSourceLoc Start{R.getBegin(), SourceMgr};
198   const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
199   assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
200 
201   const std::string &FileURI = fileNameToURI(getFileName(*FE));
202   auto I = CurrentArtifacts.find(FileURI);
203 
204   if (I == CurrentArtifacts.end()) {
205     uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
206     const SarifArtifactLocation &Location =
207         SarifArtifactLocation::create(FileURI).setIndex(Idx);
208     const SarifArtifact &Artifact = SarifArtifact::create(Location)
209                                         .setRoles({"resultFile"})
210                                         .setLength(FE->getSize())
211                                         .setMimeType("text/plain");
212     auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
213     // If inserted, ensure the original iterator points to the newly inserted
214     // element, so it can be used downstream.
215     if (StatusIter.second)
216       I = StatusIter.first;
217   }
218   assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
219   const SarifArtifactLocation &Location = I->second.Location;
220   uint32_t Idx = Location.Index.value();
221   return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}},
222                        {"region", createTextRegion(SourceMgr, R)}}};
223 }
224 
225 json::Object &SarifDocumentWriter::getCurrentTool() {
226   assert(!Closed && "SARIF Document is closed. "
227                     "Need to call createRun() before using getcurrentTool!");
228 
229   // Since Closed = false here, expect there to be at least 1 Run, anything
230   // else is an invalid state.
231   assert(!Runs.empty() && "There are no runs associated with the document!");
232 
233   return *Runs.back().getAsObject()->get("tool")->getAsObject();
234 }
235 
236 void SarifDocumentWriter::reset() {
237   CurrentRules.clear();
238   CurrentArtifacts.clear();
239 }
240 
241 void SarifDocumentWriter::endRun() {
242   // Exit early if trying to close a closed Document.
243   if (Closed) {
244     reset();
245     return;
246   }
247 
248   // Since Closed = false here, expect there to be at least 1 Run, anything
249   // else is an invalid state.
250   assert(!Runs.empty() && "There are no runs associated with the document!");
251 
252   // Flush all the rules.
253   json::Object &Tool = getCurrentTool();
254   json::Array Rules;
255   for (const SarifRule &R : CurrentRules) {
256     json::Object Rule{
257         {"name", R.Name},
258         {"id", R.Id},
259         {"fullDescription", json::Object{{"text", R.Description}}}};
260     if (!R.HelpURI.empty())
261       Rule["helpUri"] = R.HelpURI;
262     Rules.emplace_back(std::move(Rule));
263   }
264   json::Object &Driver = *Tool.getObject("driver");
265   Driver["rules"] = std::move(Rules);
266 
267   // Flush all the artifacts.
268   json::Object &Run = getCurrentRun();
269   json::Array *Artifacts = Run.getArray("artifacts");
270   for (const auto &Pair : CurrentArtifacts) {
271     const SarifArtifact &A = Pair.getValue();
272     json::Object Loc{{"uri", A.Location.URI}};
273     if (A.Location.Index.has_value()) {
274       Loc["index"] = static_cast<int64_t>(A.Location.Index.value());
275     }
276     json::Object Artifact;
277     Artifact["location"] = std::move(Loc);
278     if (A.Length.has_value())
279       Artifact["length"] = static_cast<int64_t>(A.Length.value());
280     if (!A.Roles.empty())
281       Artifact["roles"] = json::Array(A.Roles);
282     if (!A.MimeType.empty())
283       Artifact["mimeType"] = A.MimeType;
284     if (A.Offset.has_value())
285       Artifact["offset"] = A.Offset;
286     Artifacts->push_back(json::Value(std::move(Artifact)));
287   }
288 
289   // Clear, reset temporaries before next run.
290   reset();
291 
292   // Mark the document as closed.
293   Closed = true;
294 }
295 
296 json::Array
297 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
298   json::Object Ret{{"locations", json::Array{}}};
299   json::Array Locs;
300   for (const auto &ThreadFlow : ThreadFlows) {
301     json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
302     json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
303     Locs.emplace_back(
304         createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
305   }
306   Ret["locations"] = std::move(Locs);
307   return json::Array{std::move(Ret)};
308 }
309 
310 json::Object
311 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
312   return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
313 }
314 
315 void SarifDocumentWriter::createRun(StringRef ShortToolName,
316                                     StringRef LongToolName,
317                                     StringRef ToolVersion) {
318   // Clear resources associated with a previous run.
319   endRun();
320 
321   // Signify a new run has begun.
322   Closed = false;
323 
324   json::Object Tool{
325       {"driver",
326        json::Object{{"name", ShortToolName},
327                     {"fullName", LongToolName},
328                     {"language", "en-US"},
329                     {"version", ToolVersion},
330                     {"informationUri",
331                      "https://clang.llvm.org/docs/UsersManual.html"}}}};
332   json::Object TheRun{{"tool", std::move(Tool)},
333                       {"results", {}},
334                       {"artifacts", {}},
335                       {"columnKind", "unicodeCodePoints"}};
336   Runs.emplace_back(std::move(TheRun));
337 }
338 
339 json::Object &SarifDocumentWriter::getCurrentRun() {
340   assert(!Closed &&
341          "SARIF Document is closed. "
342          "Can only getCurrentRun() if document is opened via createRun(), "
343          "create a run first");
344 
345   // Since Closed = false here, expect there to be at least 1 Run, anything
346   // else is an invalid state.
347   assert(!Runs.empty() && "There are no runs associated with the document!");
348   return *Runs.back().getAsObject();
349 }
350 
351 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
352   size_t Ret = CurrentRules.size();
353   CurrentRules.emplace_back(Rule);
354   return Ret;
355 }
356 
357 void SarifDocumentWriter::appendResult(const SarifResult &Result) {
358   size_t RuleIdx = Result.RuleIdx;
359   assert(RuleIdx < CurrentRules.size() &&
360          "Trying to reference a rule that doesn't exist");
361   json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
362                    {"ruleIndex", static_cast<int64_t>(RuleIdx)},
363                    {"ruleId", CurrentRules[RuleIdx].Id}};
364   if (!Result.Locations.empty()) {
365     json::Array Locs;
366     for (auto &Range : Result.Locations) {
367       Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
368     }
369     Ret["locations"] = std::move(Locs);
370   }
371   if (!Result.ThreadFlows.empty())
372     Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
373   json::Object &Run = getCurrentRun();
374   json::Array *Results = Run.getArray("results");
375   Results->emplace_back(std::move(Ret));
376 }
377 
378 json::Object SarifDocumentWriter::createDocument() {
379   // Flush all temporaries to their destinations if needed.
380   endRun();
381 
382   json::Object Doc{
383       {"$schema", SchemaURI},
384       {"version", SchemaVersion},
385   };
386   if (!Runs.empty())
387     Doc["runs"] = json::Array(Runs);
388   return Doc;
389 }
390