1 //== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult.
10 ///
11 /// The document built can be accessed as a JSON Object.
12 /// Several value semantic types are also introduced which represent properties
13 /// of the SARIF standard, such as 'artifact', 'result', 'rule'.
14 ///
15 /// A SARIF (Static Analysis Results Interchange Format) document is JSON
16 /// document that describes in detail the results of running static analysis
17 /// tools on a project. Each (non-trivial) document consists of at least one
18 /// "run", which are themselves composed of details such as:
19 /// * Tool: The tool that was run
20 /// * Rules: The rules applied during the tool run, represented by
21 ///   \c reportingDescriptor objects in SARIF
22 /// * Results: The matches for the rules applied against the project(s) being
23 ///   evaluated, represented by \c result objects in SARIF
24 ///
25 /// Reference:
26 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a>
27 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a>
28 /// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
29 //===----------------------------------------------------------------------===//
30 
31 #ifndef LLVM_CLANG_BASIC_SARIF_H
32 #define LLVM_CLANG_BASIC_SARIF_H
33 
34 #include "clang/Basic/SourceLocation.h"
35 #include "clang/Basic/Version.h"
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/Optional.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/StringMap.h"
40 #include "llvm/ADT/StringRef.h"
41 #include "llvm/Support/JSON.h"
42 #include <cassert>
43 #include <cstddef>
44 #include <cstdint>
45 #include <initializer_list>
46 #include <string>
47 
48 namespace clang {
49 
50 class SarifDocumentWriter;
51 class SourceManager;
52 
53 namespace detail {
54 
55 /// \internal
56 /// An artifact location is SARIF's way of describing the complete location
57 /// of an artifact encountered during analysis. The \c artifactLocation object
58 /// typically consists of a URI, and/or an index to reference the artifact it
59 /// locates.
60 ///
61 /// This builder makes an additional assumption: that every artifact encountered
62 /// by \c clang will be a physical, top-level artifact. Which is why the static
63 /// creation method \ref SarifArtifactLocation::create takes a mandatory URI
64 /// parameter. The official standard states that either a \c URI or \c Index
65 /// must be available in the object, \c clang picks the \c URI as a reasonable
66 /// default, because it intends to deal in physical artifacts for now.
67 ///
68 /// Reference:
69 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a>
70 /// 2. \ref SarifArtifact
71 class SarifArtifactLocation {
72 private:
73   friend class clang::SarifDocumentWriter;
74 
75   llvm::Optional<uint32_t> Index;
76   std::string URI;
77 
78   SarifArtifactLocation() = delete;
79   explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {}
80 
81 public:
82   static SarifArtifactLocation create(llvm::StringRef URI) {
83     return SarifArtifactLocation{URI.str()};
84   }
85 
86   SarifArtifactLocation setIndex(uint32_t Idx) {
87     Index = Idx;
88     return *this;
89   }
90 };
91 
92 /// \internal
93 /// An artifact in SARIF is any object (a sequence of bytes) addressable by
94 /// a URI (RFC 3986). The most common type of artifact for clang's use-case
95 /// would be source files. SARIF's artifact object is described in detail in
96 /// section 3.24.
97 //
98 /// Since every clang artifact MUST have a location (there being no nested
99 /// artifacts), the creation method \ref SarifArtifact::create requires a
100 /// \ref SarifArtifactLocation object.
101 ///
102 /// Reference:
103 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a>
104 class SarifArtifact {
105 private:
106   friend class clang::SarifDocumentWriter;
107 
108   llvm::Optional<uint32_t> Offset;
109   llvm::Optional<size_t> Length;
110   std::string MimeType;
111   SarifArtifactLocation Location;
112   llvm::SmallVector<std::string, 4> Roles;
113 
114   SarifArtifact() = delete;
115 
116   explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {}
117 
118 public:
119   static SarifArtifact create(const SarifArtifactLocation &Loc) {
120     return SarifArtifact{Loc};
121   }
122 
123   SarifArtifact setOffset(uint32_t ArtifactOffset) {
124     Offset = ArtifactOffset;
125     return *this;
126   }
127 
128   SarifArtifact setLength(size_t NumBytes) {
129     Length = NumBytes;
130     return *this;
131   }
132 
133   SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) {
134     Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end());
135     return *this;
136   }
137 
138   SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) {
139     MimeType = ArtifactMimeType.str();
140     return *this;
141   }
142 };
143 
144 } // namespace detail
145 
146 enum class ThreadFlowImportance { Important, Essential, Unimportant };
147 
148 /// A thread flow is a sequence of code locations that specify a possible path
149 /// through a single thread of execution.
150 /// A thread flow in SARIF is related to a code flow which describes
151 /// the progress of one or more programs through one or more thread flows.
152 ///
153 /// Reference:
154 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a>
155 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a>
156 class ThreadFlow {
157   friend class SarifDocumentWriter;
158 
159   CharSourceRange Range;
160   ThreadFlowImportance Importance;
161   std::string Message;
162 
163   ThreadFlow() = default;
164 
165 public:
166   static ThreadFlow create() { return {}; }
167 
168   ThreadFlow setRange(const CharSourceRange &ItemRange) {
169     assert(ItemRange.isCharRange() &&
170            "ThreadFlows require a character granular source range!");
171     Range = ItemRange;
172     return *this;
173   }
174 
175   ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) {
176     Importance = ItemImportance;
177     return *this;
178   }
179 
180   ThreadFlow setMessage(llvm::StringRef ItemMessage) {
181     Message = ItemMessage.str();
182     return *this;
183   }
184 };
185 
186 /// A SARIF rule (\c reportingDescriptor object) contains information that
187 /// describes a reporting item generated by a tool. A reporting item is
188 /// either a result of analysis or notification of a condition encountered by
189 /// the tool. Rules are arbitrary but are identifiable by a hierarchical
190 /// rule-id.
191 ///
192 /// This builder provides an interface to create SARIF \c reportingDescriptor
193 /// objects via the \ref SarifRule::create static method.
194 ///
195 /// Reference:
196 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a>
197 class SarifRule {
198   friend class clang::SarifDocumentWriter;
199 
200   std::string Name;
201   std::string Id;
202   std::string Description;
203   std::string HelpURI;
204 
205   SarifRule() = default;
206 
207 public:
208   static SarifRule create() { return {}; }
209 
210   SarifRule setName(llvm::StringRef RuleName) {
211     Name = RuleName.str();
212     return *this;
213   }
214 
215   SarifRule setRuleId(llvm::StringRef RuleId) {
216     Id = RuleId.str();
217     return *this;
218   }
219 
220   SarifRule setDescription(llvm::StringRef RuleDesc) {
221     Description = RuleDesc.str();
222     return *this;
223   }
224 
225   SarifRule setHelpURI(llvm::StringRef RuleHelpURI) {
226     HelpURI = RuleHelpURI.str();
227     return *this;
228   }
229 };
230 
231 /// A SARIF result (also called a "reporting item") is a unit of output
232 /// produced when one of the tool's \c reportingDescriptor encounters a match
233 /// on the file being analysed by the tool.
234 ///
235 /// This builder provides a \ref SarifResult::create static method that can be
236 /// used to create an empty shell onto which attributes can be added using the
237 /// \c setX(...) methods.
238 ///
239 /// For example:
240 /// \code{.cpp}
241 /// SarifResult result = SarifResult::create(...)
242 ///                         .setRuleId(...)
243 ///                         .setDiagnosticMessage(...);
244 /// \endcode
245 ///
246 /// Reference:
247 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
248 class SarifResult {
249   friend class clang::SarifDocumentWriter;
250 
251   // NOTE:
252   // This type cannot fit all possible indexes representable by JSON, but is
253   // chosen because it is the largest unsigned type that can be safely
254   // converted to an \c int64_t.
255   uint32_t RuleIdx;
256   std::string RuleId;
257   std::string DiagnosticMessage;
258   llvm::SmallVector<CharSourceRange, 8> Locations;
259   llvm::SmallVector<ThreadFlow, 8> ThreadFlows;
260 
261   SarifResult() = delete;
262   explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {}
263 
264 public:
265   static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; }
266 
267   SarifResult setIndex(uint32_t Idx) {
268     RuleIdx = Idx;
269     return *this;
270   }
271 
272   SarifResult setRuleId(llvm::StringRef Id) {
273     RuleId = Id.str();
274     return *this;
275   }
276 
277   SarifResult setDiagnosticMessage(llvm::StringRef Message) {
278     DiagnosticMessage = Message.str();
279     return *this;
280   }
281 
282   SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) {
283 #ifndef NDEBUG
284     for (const auto &Loc : DiagLocs) {
285       assert(Loc.isCharRange() &&
286              "SARIF Results require character granular source ranges!");
287     }
288 #endif
289     Locations.assign(DiagLocs.begin(), DiagLocs.end());
290     return *this;
291   }
292   SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) {
293     ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end());
294     return *this;
295   }
296 };
297 
298 /// This class handles creating a valid SARIF document given various input
299 /// attributes. However, it requires an ordering among certain method calls:
300 ///
301 /// 1. Because every SARIF document must contain at least 1 \c run, callers
302 ///    must ensure that \ref SarifDocumentWriter::createRun is is called before
303 ///    any other methods.
304 /// 2. If SarifDocumentWriter::endRun is called, callers MUST call
305 ///    SarifDocumentWriter::createRun, before invoking any of the result
306 ///    aggregation methods such as SarifDocumentWriter::appendResult etc.
307 class SarifDocumentWriter {
308 private:
309   const llvm::StringRef SchemaURI{
310       "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/"
311       "sarif-schema-2.1.0.json"};
312   const llvm::StringRef SchemaVersion{"2.1.0"};
313 
314   /// \internal
315   /// Return a pointer to the current tool. Asserts that a run exists.
316   llvm::json::Object &getCurrentTool();
317 
318   /// \internal
319   /// Checks if there is a run associated with this document.
320   ///
321   /// \return true on success
322   bool hasRun() const;
323 
324   /// \internal
325   /// Reset portions of the internal state so that the document is ready to
326   /// receive data for a new run.
327   void reset();
328 
329   /// \internal
330   /// Return a mutable reference to the current run, after asserting it exists.
331   ///
332   /// \note It is undefined behavior to call this if a run does not exist in
333   /// the SARIF document.
334   llvm::json::Object &getCurrentRun();
335 
336   /// Create a code flow object for the given threadflows.
337   /// See \ref ThreadFlow.
338   ///
339   /// \note It is undefined behavior to call this if a run does not exist in
340   /// the SARIF document.
341   llvm::json::Object
342   createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
343 
344   /// Add the given threadflows to the ones this SARIF document knows about.
345   llvm::json::Array
346   createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
347 
348   /// Add the given \ref CharSourceRange to the SARIF document as a physical
349   /// location, with its corresponding artifact.
350   llvm::json::Object createPhysicalLocation(const CharSourceRange &R);
351 
352 public:
353   SarifDocumentWriter() = delete;
354 
355   /// Create a new empty SARIF document with the given source manager.
356   SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
357 
358   /// Release resources held by this SARIF document.
359   ~SarifDocumentWriter() = default;
360 
361   /// Create a new run with which any upcoming analysis will be associated.
362   /// Each run requires specifying the tool that is generating reporting items.
363   void createRun(const llvm::StringRef ShortToolName,
364                  const llvm::StringRef LongToolName,
365                  const llvm::StringRef ToolVersion = CLANG_VERSION_STRING);
366 
367   /// If there is a current run, end it.
368   ///
369   /// This method collects various book-keeping required to clear and close
370   /// resources associated with the current run, but may also allocate some
371   /// for the next run.
372   ///
373   /// Calling \ref endRun before associating a run through \ref createRun leads
374   /// to undefined behaviour.
375   void endRun();
376 
377   /// Associate the given rule with the current run.
378   ///
379   /// Returns an integer rule index for the created rule that is unique within
380   /// the current run, which can then be used to create a \ref SarifResult
381   /// to add to the current run. Note that a rule must exist before being
382   /// referenced by a result.
383   ///
384   /// \pre
385   /// There must be a run associated with the document, failing to do so will
386   /// cause undefined behaviour.
387   size_t createRule(const SarifRule &Rule);
388 
389   /// Append a new result to the currently in-flight run.
390   ///
391   /// \pre
392   /// There must be a run associated with the document, failing to do so will
393   /// cause undefined behaviour.
394   /// \pre
395   /// \c RuleIdx used to create the result must correspond to a rule known by
396   /// the SARIF document. It must be the value returned by a previous call
397   /// to \ref createRule.
398   void appendResult(const SarifResult &SarifResult);
399 
400   /// Return the SARIF document in its current state.
401   /// Calling this will trigger a copy of the internal state including all
402   /// reported diagnostics, resulting in an expensive call.
403   llvm::json::Object createDocument();
404 
405 private:
406   /// Source Manager to use for the current SARIF document.
407   const SourceManager &SourceMgr;
408 
409   /// Flag to track the state of this document:
410   /// A closed document is one on which a new runs must be created.
411   /// This could be a document that is freshly created, or has recently
412   /// finished writing to a previous run.
413   bool Closed = true;
414 
415   /// A sequence of SARIF runs.
416   /// Each run object describes a single run of an analysis tool and contains
417   /// the output of that run.
418   ///
419   /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a>
420   llvm::json::Array Runs;
421 
422   /// The list of rules associated with the most recent active run. These are
423   /// defined using the diagnostics passed to the SarifDocument. Each rule
424   /// need not be unique through the result set. E.g. there may be several
425   /// 'syntax' errors throughout code under analysis, each of which has its
426   /// own specific diagnostic message (and consequently, RuleId). Rules are
427   /// also known as "reportingDescriptor" objects in SARIF.
428   ///
429   /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a>
430   llvm::SmallVector<SarifRule, 32> CurrentRules;
431 
432   /// The list of artifacts that have been encountered on the most recent active
433   /// run. An artifact is defined in SARIF as a sequence of bytes addressable
434   /// by a URI. A common example for clang's case would be files named by
435   /// filesystem paths.
436   llvm::StringMap<detail::SarifArtifact> CurrentArtifacts;
437 };
438 } // namespace clang
439 
440 #endif // LLVM_CLANG_BASIC_SARIF_H
441