1 //== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult. 10 /// 11 /// The document built can be accessed as a JSON Object. 12 /// Several value semantic types are also introduced which represent properties 13 /// of the SARIF standard, such as 'artifact', 'result', 'rule'. 14 /// 15 /// A SARIF (Static Analysis Results Interchange Format) document is JSON 16 /// document that describes in detail the results of running static analysis 17 /// tools on a project. Each (non-trivial) document consists of at least one 18 /// "run", which are themselves composed of details such as: 19 /// * Tool: The tool that was run 20 /// * Rules: The rules applied during the tool run, represented by 21 /// \c reportingDescriptor objects in SARIF 22 /// * Results: The matches for the rules applied against the project(s) being 23 /// evaluated, represented by \c result objects in SARIF 24 /// 25 /// Reference: 26 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a> 27 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a> 28 /// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a> 29 //===----------------------------------------------------------------------===// 30 31 #ifndef LLVM_CLANG_BASIC_SARIF_H 32 #define LLVM_CLANG_BASIC_SARIF_H 33 34 #include "clang/Basic/SourceLocation.h" 35 #include "clang/Basic/Version.h" 36 #include "llvm/ADT/ArrayRef.h" 37 #include "llvm/ADT/Optional.h" 38 #include "llvm/ADT/SmallVector.h" 39 #include "llvm/ADT/StringMap.h" 40 #include "llvm/ADT/StringRef.h" 41 #include "llvm/Support/JSON.h" 42 #include <cassert> 43 #include <cstddef> 44 #include <cstdint> 45 #include <initializer_list> 46 #include <string> 47 48 namespace clang { 49 50 class SarifDocumentWriter; 51 class SourceManager; 52 53 namespace detail { 54 55 /// \internal 56 /// An artifact location is SARIF's way of describing the complete location 57 /// of an artifact encountered during analysis. The \c artifactLocation object 58 /// typically consists of a URI, and/or an index to reference the artifact it 59 /// locates. 60 /// 61 /// This builder makes an additional assumption: that every artifact encountered 62 /// by \c clang will be a physical, top-level artifact. Which is why the static 63 /// creation method \ref SarifArtifactLocation::create takes a mandatory URI 64 /// parameter. The official standard states that either a \c URI or \c Index 65 /// must be available in the object, \c clang picks the \c URI as a reasonable 66 /// default, because it intends to deal in physical artifacts for now. 67 /// 68 /// Reference: 69 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a> 70 /// 2. \ref SarifArtifact 71 class SarifArtifactLocation { 72 private: 73 friend class clang::SarifDocumentWriter; 74 75 llvm::Optional<uint32_t> Index; 76 std::string URI; 77 78 SarifArtifactLocation() = delete; 79 explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {} 80 81 public: 82 static SarifArtifactLocation create(llvm::StringRef URI) { 83 return SarifArtifactLocation{URI.str()}; 84 } 85 86 SarifArtifactLocation setIndex(uint32_t Idx) { 87 Index = Idx; 88 return *this; 89 } 90 }; 91 92 /// \internal 93 /// An artifact in SARIF is any object (a sequence of bytes) addressable by 94 /// a URI (RFC 3986). The most common type of artifact for clang's use-case 95 /// would be source files. SARIF's artifact object is described in detail in 96 /// section 3.24. 97 // 98 /// Since every clang artifact MUST have a location (there being no nested 99 /// artifacts), the creation method \ref SarifArtifact::create requires a 100 /// \ref SarifArtifactLocation object. 101 /// 102 /// Reference: 103 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a> 104 class SarifArtifact { 105 private: 106 friend class clang::SarifDocumentWriter; 107 108 llvm::Optional<uint32_t> Offset; 109 llvm::Optional<size_t> Length; 110 std::string MimeType; 111 SarifArtifactLocation Location; 112 llvm::SmallVector<std::string, 4> Roles; 113 114 SarifArtifact() = delete; 115 116 explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {} 117 118 public: 119 static SarifArtifact create(const SarifArtifactLocation &Loc) { 120 return SarifArtifact{Loc}; 121 } 122 123 SarifArtifact setOffset(uint32_t ArtifactOffset) { 124 Offset = ArtifactOffset; 125 return *this; 126 } 127 128 SarifArtifact setLength(size_t NumBytes) { 129 Length = NumBytes; 130 return *this; 131 } 132 133 SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) { 134 Roles.assign(ArtifactRoles.begin(), ArtifactRoles.end()); 135 return *this; 136 } 137 138 SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) { 139 MimeType = ArtifactMimeType.str(); 140 return *this; 141 } 142 }; 143 144 } // namespace detail 145 146 enum class ThreadFlowImportance { Important, Essential, Unimportant }; 147 148 /// A thread flow is a sequence of code locations that specify a possible path 149 /// through a single thread of execution. 150 /// A thread flow in SARIF is related to a code flow which describes 151 /// the progress of one or more programs through one or more thread flows. 152 /// 153 /// Reference: 154 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a> 155 /// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a> 156 class ThreadFlow { 157 friend class SarifDocumentWriter; 158 159 CharSourceRange Range; 160 ThreadFlowImportance Importance; 161 std::string Message; 162 163 ThreadFlow() = default; 164 165 public: 166 static ThreadFlow create() { return {}; } 167 168 ThreadFlow setRange(const CharSourceRange &ItemRange) { 169 assert(ItemRange.isCharRange() && 170 "ThreadFlows require a character granular source range!"); 171 Range = ItemRange; 172 return *this; 173 } 174 175 ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) { 176 Importance = ItemImportance; 177 return *this; 178 } 179 180 ThreadFlow setMessage(llvm::StringRef ItemMessage) { 181 Message = ItemMessage.str(); 182 return *this; 183 } 184 }; 185 186 /// A SARIF rule (\c reportingDescriptor object) contains information that 187 /// describes a reporting item generated by a tool. A reporting item is 188 /// either a result of analysis or notification of a condition encountered by 189 /// the tool. Rules are arbitrary but are identifiable by a hierarchical 190 /// rule-id. 191 /// 192 /// This builder provides an interface to create SARIF \c reportingDescriptor 193 /// objects via the \ref SarifRule::create static method. 194 /// 195 /// Reference: 196 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a> 197 class SarifRule { 198 friend class clang::SarifDocumentWriter; 199 200 std::string Name; 201 std::string Id; 202 std::string Description; 203 std::string HelpURI; 204 205 SarifRule() = default; 206 207 public: 208 static SarifRule create() { return {}; } 209 210 SarifRule setName(llvm::StringRef RuleName) { 211 Name = RuleName.str(); 212 return *this; 213 } 214 215 SarifRule setRuleId(llvm::StringRef RuleId) { 216 Id = RuleId.str(); 217 return *this; 218 } 219 220 SarifRule setDescription(llvm::StringRef RuleDesc) { 221 Description = RuleDesc.str(); 222 return *this; 223 } 224 225 SarifRule setHelpURI(llvm::StringRef RuleHelpURI) { 226 HelpURI = RuleHelpURI.str(); 227 return *this; 228 } 229 }; 230 231 /// A SARIF result (also called a "reporting item") is a unit of output 232 /// produced when one of the tool's \c reportingDescriptor encounters a match 233 /// on the file being analysed by the tool. 234 /// 235 /// This builder provides a \ref SarifResult::create static method that can be 236 /// used to create an empty shell onto which attributes can be added using the 237 /// \c setX(...) methods. 238 /// 239 /// For example: 240 /// \code{.cpp} 241 /// SarifResult result = SarifResult::create(...) 242 /// .setRuleId(...) 243 /// .setDiagnosticMessage(...); 244 /// \endcode 245 /// 246 /// Reference: 247 /// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a> 248 class SarifResult { 249 friend class clang::SarifDocumentWriter; 250 251 // NOTE: 252 // This type cannot fit all possible indexes representable by JSON, but is 253 // chosen because it is the largest unsigned type that can be safely 254 // converted to an \c int64_t. 255 uint32_t RuleIdx; 256 std::string RuleId; 257 std::string DiagnosticMessage; 258 llvm::SmallVector<CharSourceRange, 8> Locations; 259 llvm::SmallVector<ThreadFlow, 8> ThreadFlows; 260 261 SarifResult() = delete; 262 explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {} 263 264 public: 265 static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; } 266 267 SarifResult setIndex(uint32_t Idx) { 268 RuleIdx = Idx; 269 return *this; 270 } 271 272 SarifResult setRuleId(llvm::StringRef Id) { 273 RuleId = Id.str(); 274 return *this; 275 } 276 277 SarifResult setDiagnosticMessage(llvm::StringRef Message) { 278 DiagnosticMessage = Message.str(); 279 return *this; 280 } 281 282 SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) { 283 #ifndef NDEBUG 284 for (const auto &Loc : DiagLocs) { 285 assert(Loc.isCharRange() && 286 "SARIF Results require character granular source ranges!"); 287 } 288 #endif 289 Locations.assign(DiagLocs.begin(), DiagLocs.end()); 290 return *this; 291 } 292 SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) { 293 ThreadFlows.assign(ThreadFlowResults.begin(), ThreadFlowResults.end()); 294 return *this; 295 } 296 }; 297 298 /// This class handles creating a valid SARIF document given various input 299 /// attributes. However, it requires an ordering among certain method calls: 300 /// 301 /// 1. Because every SARIF document must contain at least 1 \c run, callers 302 /// must ensure that \ref SarifDocumentWriter::createRun is is called before 303 /// any other methods. 304 /// 2. If SarifDocumentWriter::endRun is called, callers MUST call 305 /// SarifDocumentWriter::createRun, before invoking any of the result 306 /// aggregation methods such as SarifDocumentWriter::appendResult etc. 307 class SarifDocumentWriter { 308 private: 309 const llvm::StringRef SchemaURI{ 310 "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/" 311 "sarif-schema-2.1.0.json"}; 312 const llvm::StringRef SchemaVersion{"2.1.0"}; 313 314 /// \internal 315 /// Return a pointer to the current tool. Asserts that a run exists. 316 llvm::json::Object &getCurrentTool(); 317 318 /// \internal 319 /// Checks if there is a run associated with this document. 320 /// 321 /// \return true on success 322 bool hasRun() const; 323 324 /// \internal 325 /// Reset portions of the internal state so that the document is ready to 326 /// receive data for a new run. 327 void reset(); 328 329 /// \internal 330 /// Return a mutable reference to the current run, after asserting it exists. 331 /// 332 /// \note It is undefined behavior to call this if a run does not exist in 333 /// the SARIF document. 334 llvm::json::Object &getCurrentRun(); 335 336 /// Create a code flow object for the given threadflows. 337 /// See \ref ThreadFlow. 338 /// 339 /// \note It is undefined behavior to call this if a run does not exist in 340 /// the SARIF document. 341 llvm::json::Object 342 createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows); 343 344 /// Add the given threadflows to the ones this SARIF document knows about. 345 llvm::json::Array 346 createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows); 347 348 /// Add the given \ref CharSourceRange to the SARIF document as a physical 349 /// location, with its corresponding artifact. 350 llvm::json::Object createPhysicalLocation(const CharSourceRange &R); 351 352 public: 353 SarifDocumentWriter() = delete; 354 355 /// Create a new empty SARIF document with the given source manager. 356 SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 357 358 /// Release resources held by this SARIF document. 359 ~SarifDocumentWriter() = default; 360 361 /// Create a new run with which any upcoming analysis will be associated. 362 /// Each run requires specifying the tool that is generating reporting items. 363 void createRun(const llvm::StringRef ShortToolName, 364 const llvm::StringRef LongToolName, 365 const llvm::StringRef ToolVersion = CLANG_VERSION_STRING); 366 367 /// If there is a current run, end it. 368 /// 369 /// This method collects various book-keeping required to clear and close 370 /// resources associated with the current run, but may also allocate some 371 /// for the next run. 372 /// 373 /// Calling \ref endRun before associating a run through \ref createRun leads 374 /// to undefined behaviour. 375 void endRun(); 376 377 /// Associate the given rule with the current run. 378 /// 379 /// Returns an integer rule index for the created rule that is unique within 380 /// the current run, which can then be used to create a \ref SarifResult 381 /// to add to the current run. Note that a rule must exist before being 382 /// referenced by a result. 383 /// 384 /// \pre 385 /// There must be a run associated with the document, failing to do so will 386 /// cause undefined behaviour. 387 size_t createRule(const SarifRule &Rule); 388 389 /// Append a new result to the currently in-flight run. 390 /// 391 /// \pre 392 /// There must be a run associated with the document, failing to do so will 393 /// cause undefined behaviour. 394 /// \pre 395 /// \c RuleIdx used to create the result must correspond to a rule known by 396 /// the SARIF document. It must be the value returned by a previous call 397 /// to \ref createRule. 398 void appendResult(const SarifResult &SarifResult); 399 400 /// Return the SARIF document in its current state. 401 /// Calling this will trigger a copy of the internal state including all 402 /// reported diagnostics, resulting in an expensive call. 403 llvm::json::Object createDocument(); 404 405 private: 406 /// Source Manager to use for the current SARIF document. 407 const SourceManager &SourceMgr; 408 409 /// Flag to track the state of this document: 410 /// A closed document is one on which a new runs must be created. 411 /// This could be a document that is freshly created, or has recently 412 /// finished writing to a previous run. 413 bool Closed = true; 414 415 /// A sequence of SARIF runs. 416 /// Each run object describes a single run of an analysis tool and contains 417 /// the output of that run. 418 /// 419 /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a> 420 llvm::json::Array Runs; 421 422 /// The list of rules associated with the most recent active run. These are 423 /// defined using the diagnostics passed to the SarifDocument. Each rule 424 /// need not be unique through the result set. E.g. there may be several 425 /// 'syntax' errors throughout code under analysis, each of which has its 426 /// own specific diagnostic message (and consequently, RuleId). Rules are 427 /// also known as "reportingDescriptor" objects in SARIF. 428 /// 429 /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a> 430 llvm::SmallVector<SarifRule, 32> CurrentRules; 431 432 /// The list of artifacts that have been encountered on the most recent active 433 /// run. An artifact is defined in SARIF as a sequence of bytes addressable 434 /// by a URI. A common example for clang's case would be files named by 435 /// filesystem paths. 436 llvm::StringMap<detail::SarifArtifact> CurrentArtifacts; 437 }; 438 } // namespace clang 439 440 #endif // LLVM_CLANG_BASIC_SARIF_H 441