1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SVal, Loc, and NonLoc, classes that represent 10 // abstract r-values for use with path-sensitive value tracking. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 16 17 #include "clang/AST/Expr.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 21 #include "llvm/ADT/FoldingSet.h" 22 #include "llvm/ADT/ImmutableList.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/ADT/PointerUnion.h" 26 #include "llvm/Support/Casting.h" 27 #include <cassert> 28 #include <cstdint> 29 #include <utility> 30 31 //==------------------------------------------------------------------------==// 32 // Base SVal types. 33 //==------------------------------------------------------------------------==// 34 35 namespace clang { 36 37 class CXXBaseSpecifier; 38 class FunctionDecl; 39 class LabelDecl; 40 41 namespace ento { 42 43 class CompoundValData; 44 class LazyCompoundValData; 45 class MemRegion; 46 class PointerToMemberData; 47 class SValBuilder; 48 class TypedValueRegion; 49 50 namespace nonloc { 51 52 /// Sub-kinds for NonLoc values. 53 enum Kind { 54 #define NONLOC_SVAL(Id, Parent) Id ## Kind, 55 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 56 }; 57 58 } // namespace nonloc 59 60 namespace loc { 61 62 /// Sub-kinds for Loc values. 63 enum Kind { 64 #define LOC_SVAL(Id, Parent) Id ## Kind, 65 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 66 }; 67 68 } // namespace loc 69 70 /// SVal - This represents a symbolic expression, which can be either 71 /// an L-value or an R-value. 72 /// 73 class SVal { 74 public: 75 enum BaseKind { 76 // The enumerators must be representable using 2 bits. 77 #define BASIC_SVAL(Id, Parent) Id ## Kind, 78 #define ABSTRACT_SVAL_WITH_KIND(Id, Parent) Id ## Kind, 79 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 80 }; 81 enum { BaseBits = 2, BaseMask = 0b11 }; 82 83 protected: 84 const void *Data = nullptr; 85 86 /// The lowest 2 bits are a BaseKind (0 -- 3). 87 /// The higher bits are an unsigned "kind" value. 88 unsigned Kind = 0; 89 90 explicit SVal(const void *d, bool isLoc, unsigned ValKind) 91 : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {} 92 93 explicit SVal(BaseKind k, const void *D = nullptr) : Data(D), Kind(k) {} 94 95 public: 96 explicit SVal() = default; 97 98 /// Convert to the specified SVal type, asserting that this SVal is of 99 /// the desired type. 100 template <typename T> T castAs() const { return llvm::cast<T>(*this); } 101 102 /// Convert to the specified SVal type, returning None if this SVal is 103 /// not of the desired type. 104 template <typename T> Optional<T> getAs() const { 105 return llvm::dyn_cast<T>(*this); 106 } 107 108 unsigned getRawKind() const { return Kind; } 109 BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); } 110 unsigned getSubKind() const { return Kind >> BaseBits; } 111 112 // This method is required for using SVal in a FoldingSetNode. It 113 // extracts a unique signature for this SVal object. 114 void Profile(llvm::FoldingSetNodeID &ID) const { 115 ID.AddInteger((unsigned) getRawKind()); 116 ID.AddPointer(Data); 117 } 118 119 bool operator==(SVal R) const { 120 return getRawKind() == R.getRawKind() && Data == R.Data; 121 } 122 123 bool operator!=(SVal R) const { return !(*this == R); } 124 125 bool isUnknown() const { 126 return getRawKind() == UnknownValKind; 127 } 128 129 bool isUndef() const { 130 return getRawKind() == UndefinedValKind; 131 } 132 133 bool isUnknownOrUndef() const { 134 return getRawKind() <= UnknownValKind; 135 } 136 137 bool isValid() const { 138 return getRawKind() > UnknownValKind; 139 } 140 141 bool isConstant() const; 142 143 bool isConstant(int I) const; 144 145 bool isZeroConstant() const; 146 147 /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a 148 /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl. 149 /// Otherwise return 0. 150 const FunctionDecl *getAsFunctionDecl() const; 151 152 /// If this SVal is a location and wraps a symbol, return that 153 /// SymbolRef. Otherwise return 0. 154 /// 155 /// Casts are ignored during lookup. 156 /// \param IncludeBaseRegions The boolean that controls whether the search 157 /// should continue to the base regions if the region is not symbolic. 158 SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const; 159 160 /// Get the symbol in the SVal or its base region. 161 SymbolRef getLocSymbolInBase() const; 162 163 /// If this SVal wraps a symbol return that SymbolRef. 164 /// Otherwise, return 0. 165 /// 166 /// Casts are ignored during lookup. 167 /// \param IncludeBaseRegions The boolean that controls whether the search 168 /// should continue to the base regions if the region is not symbolic. 169 SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const; 170 171 /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt, 172 /// return a pointer to APSInt which is held in it. 173 /// Otherwise, return nullptr. 174 const llvm::APSInt *getAsInteger() const; 175 176 const MemRegion *getAsRegion() const; 177 178 /// printJson - Pretty-prints in JSON format. 179 void printJson(raw_ostream &Out, bool AddQuotes) const; 180 181 void dumpToStream(raw_ostream &OS) const; 182 void dump() const; 183 184 SymExpr::symbol_iterator symbol_begin() const { 185 const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true); 186 if (SE) 187 return SE->symbol_begin(); 188 else 189 return SymExpr::symbol_iterator(); 190 } 191 192 SymExpr::symbol_iterator symbol_end() const { 193 return SymExpr::symbol_end(); 194 } 195 196 /// Try to get a reasonable type for the given value. 197 /// 198 /// \returns The best approximation of the value type or Null. 199 /// In theory, all symbolic values should be typed, but this function 200 /// is still a WIP and might have a few blind spots. 201 /// 202 /// \note This function should not be used when the user has access to the 203 /// bound expression AST node as well, since AST always has exact types. 204 /// 205 /// \note Loc values are interpreted as pointer rvalues for the purposes of 206 /// this method. 207 QualType getType(const ASTContext &) const; 208 }; 209 210 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) { 211 V.dumpToStream(os); 212 return os; 213 } 214 215 class UndefinedVal : public SVal { 216 public: 217 UndefinedVal() : SVal(UndefinedValKind) {} 218 static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; } 219 }; 220 221 class DefinedOrUnknownSVal : public SVal { 222 public: 223 // We want calling these methods to be a compiler error since they are 224 // tautologically false. 225 bool isUndef() const = delete; 226 bool isValid() const = delete; 227 228 static bool classof(SVal V) { return !V.isUndef(); } 229 230 protected: 231 explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind) 232 : SVal(d, isLoc, ValKind) {} 233 explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {} 234 }; 235 236 class UnknownVal : public DefinedOrUnknownSVal { 237 public: 238 explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {} 239 240 static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; } 241 }; 242 243 class DefinedSVal : public DefinedOrUnknownSVal { 244 public: 245 // We want calling these methods to be a compiler error since they are 246 // tautologically true/false. 247 bool isUnknown() const = delete; 248 bool isUnknownOrUndef() const = delete; 249 bool isValid() const = delete; 250 251 static bool classof(SVal V) { return !V.isUnknownOrUndef(); } 252 253 protected: 254 explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind) 255 : DefinedOrUnknownSVal(d, isLoc, ValKind) {} 256 }; 257 258 /// Represents an SVal that is guaranteed to not be UnknownVal. 259 class KnownSVal : public SVal { 260 public: 261 KnownSVal(const DefinedSVal &V) : SVal(V) {} 262 KnownSVal(const UndefinedVal &V) : SVal(V) {} 263 static bool classof(SVal V) { return !V.isUnknown(); } 264 }; 265 266 class NonLoc : public DefinedSVal { 267 protected: 268 explicit NonLoc(unsigned SubKind, const void *d) 269 : DefinedSVal(d, false, SubKind) {} 270 271 public: 272 void dumpToStream(raw_ostream &Out) const; 273 274 static bool isCompoundType(QualType T) { 275 return T->isArrayType() || T->isRecordType() || 276 T->isAnyComplexType() || T->isVectorType(); 277 } 278 279 static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; } 280 }; 281 282 class Loc : public DefinedSVal { 283 protected: 284 explicit Loc(unsigned SubKind, const void *D) 285 : DefinedSVal(const_cast<void *>(D), true, SubKind) {} 286 287 public: 288 void dumpToStream(raw_ostream &Out) const; 289 290 static bool isLocType(QualType T) { 291 return T->isAnyPointerType() || T->isBlockPointerType() || 292 T->isReferenceType() || T->isNullPtrType(); 293 } 294 295 static bool classof(SVal V) { return V.getBaseKind() == LocKind; } 296 }; 297 298 //==------------------------------------------------------------------------==// 299 // Subclasses of NonLoc. 300 //==------------------------------------------------------------------------==// 301 302 namespace nonloc { 303 304 /// Represents symbolic expression that isn't a location. 305 class SymbolVal : public NonLoc { 306 public: 307 SymbolVal() = delete; 308 SymbolVal(SymbolRef sym) : NonLoc(SymbolValKind, sym) { 309 assert(sym); 310 assert(!Loc::isLocType(sym->getType())); 311 } 312 313 LLVM_ATTRIBUTE_RETURNS_NONNULL 314 SymbolRef getSymbol() const { 315 return (const SymExpr *) Data; 316 } 317 318 bool isExpression() const { 319 return !isa<SymbolData>(getSymbol()); 320 } 321 322 static bool classof(SVal V) { 323 return V.getBaseKind() == NonLocKind && V.getSubKind() == SymbolValKind; 324 } 325 326 static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; } 327 }; 328 329 /// Value representing integer constant. 330 class ConcreteInt : public NonLoc { 331 public: 332 explicit ConcreteInt(const llvm::APSInt& V) : NonLoc(ConcreteIntKind, &V) {} 333 334 const llvm::APSInt& getValue() const { 335 return *static_cast<const llvm::APSInt *>(Data); 336 } 337 338 static bool classof(SVal V) { 339 return V.getBaseKind() == NonLocKind && V.getSubKind() == ConcreteIntKind; 340 } 341 342 static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; } 343 }; 344 345 class LocAsInteger : public NonLoc { 346 friend class ento::SValBuilder; 347 348 explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data) 349 : NonLoc(LocAsIntegerKind, &data) { 350 // We do not need to represent loc::ConcreteInt as LocAsInteger, 351 // as it'd collapse into a nonloc::ConcreteInt instead. 352 assert(data.first.getBaseKind() == LocKind && 353 (data.first.getSubKind() == loc::MemRegionValKind || 354 data.first.getSubKind() == loc::GotoLabelKind)); 355 } 356 357 public: 358 Loc getLoc() const { 359 const std::pair<SVal, uintptr_t> *D = 360 static_cast<const std::pair<SVal, uintptr_t> *>(Data); 361 return D->first.castAs<Loc>(); 362 } 363 364 unsigned getNumBits() const { 365 const std::pair<SVal, uintptr_t> *D = 366 static_cast<const std::pair<SVal, uintptr_t> *>(Data); 367 return D->second; 368 } 369 370 static bool classof(SVal V) { 371 return V.getBaseKind() == NonLocKind && V.getSubKind() == LocAsIntegerKind; 372 } 373 374 static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; } 375 }; 376 377 class CompoundVal : public NonLoc { 378 friend class ento::SValBuilder; 379 380 explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) { 381 assert(D); 382 } 383 384 public: 385 LLVM_ATTRIBUTE_RETURNS_NONNULL 386 const CompoundValData* getValue() const { 387 return static_cast<const CompoundValData *>(Data); 388 } 389 390 using iterator = llvm::ImmutableList<SVal>::iterator; 391 392 iterator begin() const; 393 iterator end() const; 394 395 static bool classof(SVal V) { 396 return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind; 397 } 398 399 static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; } 400 }; 401 402 class LazyCompoundVal : public NonLoc { 403 friend class ento::SValBuilder; 404 405 explicit LazyCompoundVal(const LazyCompoundValData *D) 406 : NonLoc(LazyCompoundValKind, D) { 407 assert(D); 408 } 409 410 public: 411 LLVM_ATTRIBUTE_RETURNS_NONNULL 412 const LazyCompoundValData *getCVData() const { 413 return static_cast<const LazyCompoundValData *>(Data); 414 } 415 416 /// It might return null. 417 const void *getStore() const; 418 419 LLVM_ATTRIBUTE_RETURNS_NONNULL 420 const TypedValueRegion *getRegion() const; 421 422 static bool classof(SVal V) { 423 return V.getBaseKind() == NonLocKind && 424 V.getSubKind() == LazyCompoundValKind; 425 } 426 427 static bool classof(NonLoc V) { 428 return V.getSubKind() == LazyCompoundValKind; 429 } 430 }; 431 432 /// Value representing pointer-to-member. 433 /// 434 /// This value is qualified as NonLoc because neither loading nor storing 435 /// operations are applied to it. Instead, the analyzer uses the L-value coming 436 /// from pointer-to-member applied to an object. 437 /// This SVal is represented by a NamedDecl which can be a member function 438 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers. 439 /// This list is required to accumulate the pointer-to-member cast history to 440 /// figure out the correct subobject field. In particular, implicit casts grow 441 /// this list and explicit casts like static_cast shrink this list. 442 class PointerToMember : public NonLoc { 443 friend class ento::SValBuilder; 444 445 public: 446 using PTMDataType = 447 llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>; 448 449 const PTMDataType getPTMData() const { 450 return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data)); 451 } 452 453 bool isNullMemberPointer() const; 454 455 const NamedDecl *getDecl() const; 456 457 template<typename AdjustedDecl> 458 const AdjustedDecl *getDeclAs() const { 459 return dyn_cast_or_null<AdjustedDecl>(getDecl()); 460 } 461 462 using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator; 463 464 iterator begin() const; 465 iterator end() const; 466 467 static bool classof(SVal V) { 468 return V.getBaseKind() == NonLocKind && 469 V.getSubKind() == PointerToMemberKind; 470 } 471 472 static bool classof(NonLoc V) { 473 return V.getSubKind() == PointerToMemberKind; 474 } 475 476 private: 477 explicit PointerToMember(const PTMDataType D) 478 : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} 479 }; 480 481 } // namespace nonloc 482 483 //==------------------------------------------------------------------------==// 484 // Subclasses of Loc. 485 //==------------------------------------------------------------------------==// 486 487 namespace loc { 488 489 class GotoLabel : public Loc { 490 public: 491 explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) { 492 assert(Label); 493 } 494 495 const LabelDecl *getLabel() const { 496 return static_cast<const LabelDecl *>(Data); 497 } 498 499 static bool classof(SVal V) { 500 return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind; 501 } 502 503 static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; } 504 }; 505 506 class MemRegionVal : public Loc { 507 public: 508 explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) { 509 assert(r); 510 } 511 512 /// Get the underlining region. 513 const MemRegion *getRegion() const { 514 return static_cast<const MemRegion *>(Data); 515 } 516 517 /// Get the underlining region and strip casts. 518 const MemRegion* stripCasts(bool StripBaseCasts = true) const; 519 520 template <typename REGION> 521 const REGION* getRegionAs() const { 522 return dyn_cast<REGION>(getRegion()); 523 } 524 525 bool operator==(const MemRegionVal &R) const { 526 return getRegion() == R.getRegion(); 527 } 528 529 bool operator!=(const MemRegionVal &R) const { 530 return getRegion() != R.getRegion(); 531 } 532 533 static bool classof(SVal V) { 534 return V.getBaseKind() == LocKind && V.getSubKind() == MemRegionValKind; 535 } 536 537 static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; } 538 }; 539 540 class ConcreteInt : public Loc { 541 public: 542 explicit ConcreteInt(const llvm::APSInt& V) : Loc(ConcreteIntKind, &V) {} 543 544 const llvm::APSInt &getValue() const { 545 return *static_cast<const llvm::APSInt *>(Data); 546 } 547 548 static bool classof(SVal V) { 549 return V.getBaseKind() == LocKind && V.getSubKind() == ConcreteIntKind; 550 } 551 552 static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; } 553 }; 554 555 } // namespace loc 556 } // namespace ento 557 } // namespace clang 558 559 namespace llvm { 560 template <typename To, typename From> 561 struct CastInfo< 562 To, From, 563 std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>> 564 : public CastIsPossible<To, ::clang::ento::SVal> { 565 using Self = CastInfo< 566 To, From, 567 std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>; 568 static bool isPossible(const From &V) { 569 return To::classof(*static_cast<const ::clang::ento::SVal *>(&V)); 570 } 571 static Optional<To> castFailed() { return Optional<To>{}; } 572 static To doCast(const From &f) { 573 return *static_cast<const To *>(cast<::clang::ento::SVal>(&f)); 574 } 575 static Optional<To> doCastIfPossible(const From &f) { 576 if (!Self::isPossible(f)) 577 return Self::castFailed(); 578 return doCast(f); 579 } 580 }; 581 } // namespace llvm 582 583 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 584