1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SVal, Loc, and NonLoc, classes that represent 10 // abstract r-values for use with path-sensitive value tracking. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 16 17 #include "clang/AST/Expr.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 21 #include "llvm/ADT/FoldingSet.h" 22 #include "llvm/ADT/ImmutableList.h" 23 #include "llvm/ADT/PointerUnion.h" 24 #include "llvm/ADT/iterator_range.h" 25 #include "llvm/Support/Casting.h" 26 #include <cassert> 27 #include <cstdint> 28 #include <optional> 29 #include <utility> 30 31 //==------------------------------------------------------------------------==// 32 // Base SVal types. 33 //==------------------------------------------------------------------------==// 34 35 namespace clang { 36 37 class CXXBaseSpecifier; 38 class FunctionDecl; 39 class LabelDecl; 40 41 namespace ento { 42 43 class CompoundValData; 44 class LazyCompoundValData; 45 class MemRegion; 46 class PointerToMemberData; 47 class SValBuilder; 48 class TypedValueRegion; 49 50 namespace nonloc { 51 52 /// Sub-kinds for NonLoc values. 53 enum Kind { 54 #define NONLOC_SVAL(Id, Parent) Id ## Kind, 55 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 56 }; 57 58 } // namespace nonloc 59 60 namespace loc { 61 62 /// Sub-kinds for Loc values. 63 enum Kind { 64 #define LOC_SVAL(Id, Parent) Id ## Kind, 65 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 66 }; 67 68 } // namespace loc 69 70 /// SVal - This represents a symbolic expression, which can be either 71 /// an L-value or an R-value. 72 /// 73 class SVal { 74 public: 75 enum BaseKind { 76 // The enumerators must be representable using 2 bits. 77 #define BASIC_SVAL(Id, Parent) Id ## Kind, 78 #define ABSTRACT_SVAL_WITH_KIND(Id, Parent) Id ## Kind, 79 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 80 }; 81 enum { BaseBits = 2, BaseMask = 0b11 }; 82 83 protected: 84 const void *Data = nullptr; 85 86 /// The lowest 2 bits are a BaseKind (0 -- 3). 87 /// The higher bits are an unsigned "kind" value. 88 unsigned Kind = 0; 89 90 explicit SVal(const void *d, bool isLoc, unsigned ValKind) 91 : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {} 92 93 explicit SVal(BaseKind k, const void *D = nullptr) : Data(D), Kind(k) {} 94 95 public: 96 explicit SVal() = default; 97 98 /// Convert to the specified SVal type, asserting that this SVal is of 99 /// the desired type. 100 template <typename T> T castAs() const { return llvm::cast<T>(*this); } 101 102 /// Convert to the specified SVal type, returning std::nullopt if this SVal is 103 /// not of the desired type. 104 template <typename T> std::optional<T> getAs() const { 105 return llvm::dyn_cast<T>(*this); 106 } 107 108 unsigned getRawKind() const { return Kind; } 109 BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); } 110 unsigned getSubKind() const { return Kind >> BaseBits; } 111 112 // This method is required for using SVal in a FoldingSetNode. It 113 // extracts a unique signature for this SVal object. 114 void Profile(llvm::FoldingSetNodeID &ID) const { 115 ID.AddInteger((unsigned) getRawKind()); 116 ID.AddPointer(Data); 117 } 118 119 bool operator==(SVal R) const { 120 return getRawKind() == R.getRawKind() && Data == R.Data; 121 } 122 123 bool operator!=(SVal R) const { return !(*this == R); } 124 125 bool isUnknown() const { 126 return getRawKind() == UnknownValKind; 127 } 128 129 bool isUndef() const { 130 return getRawKind() == UndefinedValKind; 131 } 132 133 bool isUnknownOrUndef() const { 134 return getRawKind() <= UnknownValKind; 135 } 136 137 bool isValid() const { 138 return getRawKind() > UnknownValKind; 139 } 140 141 bool isConstant() const; 142 143 bool isConstant(int I) const; 144 145 bool isZeroConstant() const; 146 147 /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a 148 /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl. 149 /// Otherwise return 0. 150 const FunctionDecl *getAsFunctionDecl() const; 151 152 /// If this SVal is a location and wraps a symbol, return that 153 /// SymbolRef. Otherwise return 0. 154 /// 155 /// Casts are ignored during lookup. 156 /// \param IncludeBaseRegions The boolean that controls whether the search 157 /// should continue to the base regions if the region is not symbolic. 158 SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const; 159 160 /// Get the symbol in the SVal or its base region. 161 SymbolRef getLocSymbolInBase() const; 162 163 /// If this SVal wraps a symbol return that SymbolRef. 164 /// Otherwise, return 0. 165 /// 166 /// Casts are ignored during lookup. 167 /// \param IncludeBaseRegions The boolean that controls whether the search 168 /// should continue to the base regions if the region is not symbolic. 169 SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const; 170 171 /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt, 172 /// return a pointer to APSInt which is held in it. 173 /// Otherwise, return nullptr. 174 const llvm::APSInt *getAsInteger() const; 175 176 const MemRegion *getAsRegion() const; 177 178 /// printJson - Pretty-prints in JSON format. 179 void printJson(raw_ostream &Out, bool AddQuotes) const; 180 181 void dumpToStream(raw_ostream &OS) const; 182 void dump() const; 183 184 llvm::iterator_range<SymExpr::symbol_iterator> symbols() const { 185 if (const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true)) 186 return SE->symbols(); 187 SymExpr::symbol_iterator end{}; 188 return llvm::make_range(end, end); 189 } 190 191 /// Try to get a reasonable type for the given value. 192 /// 193 /// \returns The best approximation of the value type or Null. 194 /// In theory, all symbolic values should be typed, but this function 195 /// is still a WIP and might have a few blind spots. 196 /// 197 /// \note This function should not be used when the user has access to the 198 /// bound expression AST node as well, since AST always has exact types. 199 /// 200 /// \note Loc values are interpreted as pointer rvalues for the purposes of 201 /// this method. 202 QualType getType(const ASTContext &) const; 203 }; 204 205 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) { 206 V.dumpToStream(os); 207 return os; 208 } 209 210 class UndefinedVal : public SVal { 211 public: 212 UndefinedVal() : SVal(UndefinedValKind) {} 213 static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; } 214 }; 215 216 class DefinedOrUnknownSVal : public SVal { 217 public: 218 // We want calling these methods to be a compiler error since they are 219 // tautologically false. 220 bool isUndef() const = delete; 221 bool isValid() const = delete; 222 223 static bool classof(SVal V) { return !V.isUndef(); } 224 225 protected: 226 explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind) 227 : SVal(d, isLoc, ValKind) {} 228 explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {} 229 }; 230 231 class UnknownVal : public DefinedOrUnknownSVal { 232 public: 233 explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {} 234 235 static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; } 236 }; 237 238 class DefinedSVal : public DefinedOrUnknownSVal { 239 public: 240 // We want calling these methods to be a compiler error since they are 241 // tautologically true/false. 242 bool isUnknown() const = delete; 243 bool isUnknownOrUndef() const = delete; 244 bool isValid() const = delete; 245 246 static bool classof(SVal V) { return !V.isUnknownOrUndef(); } 247 248 protected: 249 explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind) 250 : DefinedOrUnknownSVal(d, isLoc, ValKind) {} 251 }; 252 253 /// Represents an SVal that is guaranteed to not be UnknownVal. 254 class KnownSVal : public SVal { 255 public: 256 KnownSVal(const DefinedSVal &V) : SVal(V) {} 257 KnownSVal(const UndefinedVal &V) : SVal(V) {} 258 static bool classof(SVal V) { return !V.isUnknown(); } 259 }; 260 261 class NonLoc : public DefinedSVal { 262 protected: 263 explicit NonLoc(unsigned SubKind, const void *d) 264 : DefinedSVal(d, false, SubKind) {} 265 266 public: 267 void dumpToStream(raw_ostream &Out) const; 268 269 static bool isCompoundType(QualType T) { 270 return T->isArrayType() || T->isRecordType() || 271 T->isAnyComplexType() || T->isVectorType(); 272 } 273 274 static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; } 275 }; 276 277 class Loc : public DefinedSVal { 278 protected: 279 explicit Loc(unsigned SubKind, const void *D) 280 : DefinedSVal(const_cast<void *>(D), true, SubKind) {} 281 282 public: 283 void dumpToStream(raw_ostream &Out) const; 284 285 static bool isLocType(QualType T) { 286 return T->isAnyPointerType() || T->isBlockPointerType() || 287 T->isReferenceType() || T->isNullPtrType(); 288 } 289 290 static bool classof(SVal V) { return V.getBaseKind() == LocKind; } 291 }; 292 293 //==------------------------------------------------------------------------==// 294 // Subclasses of NonLoc. 295 //==------------------------------------------------------------------------==// 296 297 namespace nonloc { 298 299 /// Represents symbolic expression that isn't a location. 300 class SymbolVal : public NonLoc { 301 public: 302 SymbolVal() = delete; 303 SymbolVal(SymbolRef sym) : NonLoc(SymbolValKind, sym) { 304 assert(sym); 305 assert(!Loc::isLocType(sym->getType())); 306 } 307 308 LLVM_ATTRIBUTE_RETURNS_NONNULL 309 SymbolRef getSymbol() const { 310 return (const SymExpr *) Data; 311 } 312 313 bool isExpression() const { 314 return !isa<SymbolData>(getSymbol()); 315 } 316 317 static bool classof(SVal V) { 318 return V.getBaseKind() == NonLocKind && V.getSubKind() == SymbolValKind; 319 } 320 321 static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; } 322 }; 323 324 /// Value representing integer constant. 325 class ConcreteInt : public NonLoc { 326 public: 327 explicit ConcreteInt(const llvm::APSInt& V) : NonLoc(ConcreteIntKind, &V) {} 328 329 const llvm::APSInt& getValue() const { 330 return *static_cast<const llvm::APSInt *>(Data); 331 } 332 333 static bool classof(SVal V) { 334 return V.getBaseKind() == NonLocKind && V.getSubKind() == ConcreteIntKind; 335 } 336 337 static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; } 338 }; 339 340 class LocAsInteger : public NonLoc { 341 friend class ento::SValBuilder; 342 343 explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data) 344 : NonLoc(LocAsIntegerKind, &data) { 345 // We do not need to represent loc::ConcreteInt as LocAsInteger, 346 // as it'd collapse into a nonloc::ConcreteInt instead. 347 assert(data.first.getBaseKind() == LocKind && 348 (data.first.getSubKind() == loc::MemRegionValKind || 349 data.first.getSubKind() == loc::GotoLabelKind)); 350 } 351 352 public: 353 Loc getLoc() const { 354 const std::pair<SVal, uintptr_t> *D = 355 static_cast<const std::pair<SVal, uintptr_t> *>(Data); 356 return D->first.castAs<Loc>(); 357 } 358 359 unsigned getNumBits() const { 360 const std::pair<SVal, uintptr_t> *D = 361 static_cast<const std::pair<SVal, uintptr_t> *>(Data); 362 return D->second; 363 } 364 365 static bool classof(SVal V) { 366 return V.getBaseKind() == NonLocKind && V.getSubKind() == LocAsIntegerKind; 367 } 368 369 static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; } 370 }; 371 372 class CompoundVal : public NonLoc { 373 friend class ento::SValBuilder; 374 375 explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) { 376 assert(D); 377 } 378 379 public: 380 LLVM_ATTRIBUTE_RETURNS_NONNULL 381 const CompoundValData* getValue() const { 382 return static_cast<const CompoundValData *>(Data); 383 } 384 385 using iterator = llvm::ImmutableList<SVal>::iterator; 386 387 iterator begin() const; 388 iterator end() const; 389 390 static bool classof(SVal V) { 391 return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind; 392 } 393 394 static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; } 395 }; 396 397 class LazyCompoundVal : public NonLoc { 398 friend class ento::SValBuilder; 399 400 explicit LazyCompoundVal(const LazyCompoundValData *D) 401 : NonLoc(LazyCompoundValKind, D) { 402 assert(D); 403 } 404 405 public: 406 LLVM_ATTRIBUTE_RETURNS_NONNULL 407 const LazyCompoundValData *getCVData() const { 408 return static_cast<const LazyCompoundValData *>(Data); 409 } 410 411 /// It might return null. 412 const void *getStore() const; 413 414 LLVM_ATTRIBUTE_RETURNS_NONNULL 415 const TypedValueRegion *getRegion() const; 416 417 static bool classof(SVal V) { 418 return V.getBaseKind() == NonLocKind && 419 V.getSubKind() == LazyCompoundValKind; 420 } 421 422 static bool classof(NonLoc V) { 423 return V.getSubKind() == LazyCompoundValKind; 424 } 425 }; 426 427 /// Value representing pointer-to-member. 428 /// 429 /// This value is qualified as NonLoc because neither loading nor storing 430 /// operations are applied to it. Instead, the analyzer uses the L-value coming 431 /// from pointer-to-member applied to an object. 432 /// This SVal is represented by a NamedDecl which can be a member function 433 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers. 434 /// This list is required to accumulate the pointer-to-member cast history to 435 /// figure out the correct subobject field. In particular, implicit casts grow 436 /// this list and explicit casts like static_cast shrink this list. 437 class PointerToMember : public NonLoc { 438 friend class ento::SValBuilder; 439 440 public: 441 using PTMDataType = 442 llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>; 443 444 const PTMDataType getPTMData() const { 445 return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data)); 446 } 447 448 bool isNullMemberPointer() const; 449 450 const NamedDecl *getDecl() const; 451 452 template<typename AdjustedDecl> 453 const AdjustedDecl *getDeclAs() const { 454 return dyn_cast_or_null<AdjustedDecl>(getDecl()); 455 } 456 457 using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator; 458 459 iterator begin() const; 460 iterator end() const; 461 462 static bool classof(SVal V) { 463 return V.getBaseKind() == NonLocKind && 464 V.getSubKind() == PointerToMemberKind; 465 } 466 467 static bool classof(NonLoc V) { 468 return V.getSubKind() == PointerToMemberKind; 469 } 470 471 private: 472 explicit PointerToMember(const PTMDataType D) 473 : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} 474 }; 475 476 } // namespace nonloc 477 478 //==------------------------------------------------------------------------==// 479 // Subclasses of Loc. 480 //==------------------------------------------------------------------------==// 481 482 namespace loc { 483 484 class GotoLabel : public Loc { 485 public: 486 explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) { 487 assert(Label); 488 } 489 490 const LabelDecl *getLabel() const { 491 return static_cast<const LabelDecl *>(Data); 492 } 493 494 static bool classof(SVal V) { 495 return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind; 496 } 497 498 static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; } 499 }; 500 501 class MemRegionVal : public Loc { 502 public: 503 explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) { 504 assert(r); 505 } 506 507 /// Get the underlining region. 508 const MemRegion *getRegion() const { 509 return static_cast<const MemRegion *>(Data); 510 } 511 512 /// Get the underlining region and strip casts. 513 const MemRegion* stripCasts(bool StripBaseCasts = true) const; 514 515 template <typename REGION> 516 const REGION* getRegionAs() const { 517 return dyn_cast<REGION>(getRegion()); 518 } 519 520 bool operator==(const MemRegionVal &R) const { 521 return getRegion() == R.getRegion(); 522 } 523 524 bool operator!=(const MemRegionVal &R) const { 525 return getRegion() != R.getRegion(); 526 } 527 528 static bool classof(SVal V) { 529 return V.getBaseKind() == LocKind && V.getSubKind() == MemRegionValKind; 530 } 531 532 static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; } 533 }; 534 535 class ConcreteInt : public Loc { 536 public: 537 explicit ConcreteInt(const llvm::APSInt& V) : Loc(ConcreteIntKind, &V) {} 538 539 const llvm::APSInt &getValue() const { 540 return *static_cast<const llvm::APSInt *>(Data); 541 } 542 543 static bool classof(SVal V) { 544 return V.getBaseKind() == LocKind && V.getSubKind() == ConcreteIntKind; 545 } 546 547 static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; } 548 }; 549 550 } // namespace loc 551 } // namespace ento 552 } // namespace clang 553 554 namespace llvm { 555 template <typename To, typename From> 556 struct CastInfo< 557 To, From, 558 std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>> 559 : public CastIsPossible<To, ::clang::ento::SVal> { 560 using Self = CastInfo< 561 To, From, 562 std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>; 563 static bool isPossible(const From &V) { 564 return To::classof(*static_cast<const ::clang::ento::SVal *>(&V)); 565 } 566 static std::optional<To> castFailed() { return std::optional<To>{}; } 567 static To doCast(const From &f) { 568 return *static_cast<const To *>(cast<::clang::ento::SVal>(&f)); 569 } 570 static std::optional<To> doCastIfPossible(const From &f) { 571 if (!Self::isPossible(f)) 572 return Self::castFailed(); 573 return doCast(f); 574 } 575 }; 576 } // namespace llvm 577 578 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 579