1 //===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SymbolManager, a class that manages symbolic values 10 // created for use by ExprEngine and related classes. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 16 17 #include "clang/AST/Expr.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/AnalysisDeclContext.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/DenseSet.h" 26 #include "llvm/ADT/FoldingSet.h" 27 #include "llvm/Support/Allocator.h" 28 #include <cassert> 29 30 namespace clang { 31 32 class ASTContext; 33 class Stmt; 34 35 namespace ento { 36 37 class BasicValueFactory; 38 class StoreManager; 39 40 ///A symbol representing the value stored at a MemRegion. 41 class SymbolRegionValue : public SymbolData { 42 const TypedValueRegion *R; 43 44 public: 45 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) 46 : SymbolData(SymbolRegionValueKind, sym), R(r) { 47 assert(r); 48 assert(isValidTypeForSymbol(r->getValueType())); 49 } 50 51 const TypedValueRegion* getRegion() const { return R; } 52 53 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { 54 profile.AddInteger((unsigned) SymbolRegionValueKind); 55 profile.AddPointer(R); 56 } 57 58 void Profile(llvm::FoldingSetNodeID& profile) override { 59 Profile(profile, R); 60 } 61 62 StringRef getKindStr() const override; 63 64 void dumpToStream(raw_ostream &os) const override; 65 const MemRegion *getOriginRegion() const override { return getRegion(); } 66 67 QualType getType() const override; 68 69 // Implement isa<T> support. 70 static bool classof(const SymExpr *SE) { 71 return SE->getKind() == SymbolRegionValueKind; 72 } 73 }; 74 75 /// A symbol representing the result of an expression in the case when we do 76 /// not know anything about what the expression is. 77 class SymbolConjured : public SymbolData { 78 const Stmt *S; 79 QualType T; 80 unsigned Count; 81 const LocationContext *LCtx; 82 const void *SymbolTag; 83 84 public: 85 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, 86 QualType t, unsigned count, const void *symbolTag) 87 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), 88 LCtx(lctx), SymbolTag(symbolTag) { 89 // FIXME: 's' might be a nullptr if we're conducting invalidation 90 // that was caused by a destructor call on a temporary object, 91 // which has no statement associated with it. 92 // Due to this, we might be creating the same invalidation symbol for 93 // two different invalidation passes (for two different temporaries). 94 assert(lctx); 95 assert(isValidTypeForSymbol(t)); 96 } 97 98 const Stmt *getStmt() const { return S; } 99 unsigned getCount() const { return Count; } 100 const void *getTag() const { return SymbolTag; } 101 102 QualType getType() const override; 103 104 StringRef getKindStr() const override; 105 106 void dumpToStream(raw_ostream &os) const override; 107 108 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, 109 QualType T, unsigned Count, const LocationContext *LCtx, 110 const void *SymbolTag) { 111 profile.AddInteger((unsigned) SymbolConjuredKind); 112 profile.AddPointer(S); 113 profile.AddPointer(LCtx); 114 profile.Add(T); 115 profile.AddInteger(Count); 116 profile.AddPointer(SymbolTag); 117 } 118 119 void Profile(llvm::FoldingSetNodeID& profile) override { 120 Profile(profile, S, T, Count, LCtx, SymbolTag); 121 } 122 123 // Implement isa<T> support. 124 static bool classof(const SymExpr *SE) { 125 return SE->getKind() == SymbolConjuredKind; 126 } 127 }; 128 129 /// A symbol representing the value of a MemRegion whose parent region has 130 /// symbolic value. 131 class SymbolDerived : public SymbolData { 132 SymbolRef parentSymbol; 133 const TypedValueRegion *R; 134 135 public: 136 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) 137 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { 138 assert(parent); 139 assert(r); 140 assert(isValidTypeForSymbol(r->getValueType())); 141 } 142 143 SymbolRef getParentSymbol() const { return parentSymbol; } 144 const TypedValueRegion *getRegion() const { return R; } 145 146 QualType getType() const override; 147 148 StringRef getKindStr() const override; 149 150 void dumpToStream(raw_ostream &os) const override; 151 const MemRegion *getOriginRegion() const override { return getRegion(); } 152 153 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, 154 const TypedValueRegion *r) { 155 profile.AddInteger((unsigned) SymbolDerivedKind); 156 profile.AddPointer(r); 157 profile.AddPointer(parent); 158 } 159 160 void Profile(llvm::FoldingSetNodeID& profile) override { 161 Profile(profile, parentSymbol, R); 162 } 163 164 // Implement isa<T> support. 165 static bool classof(const SymExpr *SE) { 166 return SE->getKind() == SymbolDerivedKind; 167 } 168 }; 169 170 /// SymbolExtent - Represents the extent (size in bytes) of a bounded region. 171 /// Clients should not ask the SymbolManager for a region's extent. Always use 172 /// SubRegion::getExtent instead -- the value returned may not be a symbol. 173 class SymbolExtent : public SymbolData { 174 const SubRegion *R; 175 176 public: 177 SymbolExtent(SymbolID sym, const SubRegion *r) 178 : SymbolData(SymbolExtentKind, sym), R(r) { 179 assert(r); 180 } 181 182 const SubRegion *getRegion() const { return R; } 183 184 QualType getType() const override; 185 186 StringRef getKindStr() const override; 187 188 void dumpToStream(raw_ostream &os) const override; 189 190 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { 191 profile.AddInteger((unsigned) SymbolExtentKind); 192 profile.AddPointer(R); 193 } 194 195 void Profile(llvm::FoldingSetNodeID& profile) override { 196 Profile(profile, R); 197 } 198 199 // Implement isa<T> support. 200 static bool classof(const SymExpr *SE) { 201 return SE->getKind() == SymbolExtentKind; 202 } 203 }; 204 205 /// SymbolMetadata - Represents path-dependent metadata about a specific region. 206 /// Metadata symbols remain live as long as they are marked as in use before 207 /// dead-symbol sweeping AND their associated regions are still alive. 208 /// Intended for use by checkers. 209 class SymbolMetadata : public SymbolData { 210 const MemRegion* R; 211 const Stmt *S; 212 QualType T; 213 const LocationContext *LCtx; 214 unsigned Count; 215 const void *Tag; 216 217 public: 218 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, 219 const LocationContext *LCtx, unsigned count, const void *tag) 220 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), 221 Count(count), Tag(tag) { 222 assert(r); 223 assert(s); 224 assert(isValidTypeForSymbol(t)); 225 assert(LCtx); 226 assert(tag); 227 } 228 229 const MemRegion *getRegion() const { return R; } 230 const Stmt *getStmt() const { return S; } 231 const LocationContext *getLocationContext() const { return LCtx; } 232 unsigned getCount() const { return Count; } 233 const void *getTag() const { return Tag; } 234 235 QualType getType() const override; 236 237 StringRef getKindStr() const override; 238 239 void dumpToStream(raw_ostream &os) const override; 240 241 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, 242 const Stmt *S, QualType T, const LocationContext *LCtx, 243 unsigned Count, const void *Tag) { 244 profile.AddInteger((unsigned) SymbolMetadataKind); 245 profile.AddPointer(R); 246 profile.AddPointer(S); 247 profile.Add(T); 248 profile.AddPointer(LCtx); 249 profile.AddInteger(Count); 250 profile.AddPointer(Tag); 251 } 252 253 void Profile(llvm::FoldingSetNodeID& profile) override { 254 Profile(profile, R, S, T, LCtx, Count, Tag); 255 } 256 257 // Implement isa<T> support. 258 static bool classof(const SymExpr *SE) { 259 return SE->getKind() == SymbolMetadataKind; 260 } 261 }; 262 263 /// Represents a cast expression. 264 class SymbolCast : public SymExpr { 265 const SymExpr *Operand; 266 267 /// Type of the operand. 268 QualType FromTy; 269 270 /// The type of the result. 271 QualType ToTy; 272 273 public: 274 SymbolCast(const SymExpr *In, QualType From, QualType To) 275 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { 276 assert(In); 277 assert(isValidTypeForSymbol(From)); 278 // FIXME: GenericTaintChecker creates symbols of void type. 279 // Otherwise, 'To' should also be a valid type. 280 } 281 282 unsigned computeComplexity() const override { 283 if (Complexity == 0) 284 Complexity = 1 + Operand->computeComplexity(); 285 return Complexity; 286 } 287 288 QualType getType() const override { return ToTy; } 289 290 const SymExpr *getOperand() const { return Operand; } 291 292 void dumpToStream(raw_ostream &os) const override; 293 294 static void Profile(llvm::FoldingSetNodeID& ID, 295 const SymExpr *In, QualType From, QualType To) { 296 ID.AddInteger((unsigned) SymbolCastKind); 297 ID.AddPointer(In); 298 ID.Add(From); 299 ID.Add(To); 300 } 301 302 void Profile(llvm::FoldingSetNodeID& ID) override { 303 Profile(ID, Operand, FromTy, ToTy); 304 } 305 306 // Implement isa<T> support. 307 static bool classof(const SymExpr *SE) { 308 return SE->getKind() == SymbolCastKind; 309 } 310 }; 311 312 /// Represents a symbolic expression involving a binary operator 313 class BinarySymExpr : public SymExpr { 314 BinaryOperator::Opcode Op; 315 QualType T; 316 317 protected: 318 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t) 319 : SymExpr(k), Op(op), T(t) { 320 assert(classof(this)); 321 // Binary expressions are results of arithmetic. Pointer arithmetic is not 322 // handled by binary expressions, but it is instead handled by applying 323 // sub-regions to regions. 324 assert(isValidTypeForSymbol(t) && !Loc::isLocType(t)); 325 } 326 327 public: 328 // FIXME: We probably need to make this out-of-line to avoid redundant 329 // generation of virtual functions. 330 QualType getType() const override { return T; } 331 332 BinaryOperator::Opcode getOpcode() const { return Op; } 333 334 // Implement isa<T> support. 335 static bool classof(const SymExpr *SE) { 336 Kind k = SE->getKind(); 337 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS; 338 } 339 340 protected: 341 static unsigned computeOperandComplexity(const SymExpr *Value) { 342 return Value->computeComplexity(); 343 } 344 static unsigned computeOperandComplexity(const llvm::APSInt &Value) { 345 return 1; 346 } 347 348 static const llvm::APSInt *getPointer(const llvm::APSInt &Value) { 349 return &Value; 350 } 351 static const SymExpr *getPointer(const SymExpr *Value) { return Value; } 352 353 static void dumpToStreamImpl(raw_ostream &os, const SymExpr *Value); 354 static void dumpToStreamImpl(raw_ostream &os, const llvm::APSInt &Value); 355 static void dumpToStreamImpl(raw_ostream &os, BinaryOperator::Opcode op); 356 }; 357 358 /// Template implementation for all binary symbolic expressions 359 template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassKind> 360 class BinarySymExprImpl : public BinarySymExpr { 361 LHSTYPE LHS; 362 RHSTYPE RHS; 363 364 public: 365 BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs, 366 QualType t) 367 : BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) { 368 assert(getPointer(lhs)); 369 assert(getPointer(rhs)); 370 } 371 372 void dumpToStream(raw_ostream &os) const override { 373 dumpToStreamImpl(os, LHS); 374 dumpToStreamImpl(os, getOpcode()); 375 dumpToStreamImpl(os, RHS); 376 } 377 378 LHSTYPE getLHS() const { return LHS; } 379 RHSTYPE getRHS() const { return RHS; } 380 381 unsigned computeComplexity() const override { 382 if (Complexity == 0) 383 Complexity = 384 computeOperandComplexity(RHS) + computeOperandComplexity(LHS); 385 return Complexity; 386 } 387 388 static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs, 389 BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) { 390 ID.AddInteger((unsigned)ClassKind); 391 ID.AddPointer(getPointer(lhs)); 392 ID.AddInteger(op); 393 ID.AddPointer(getPointer(rhs)); 394 ID.Add(t); 395 } 396 397 void Profile(llvm::FoldingSetNodeID &ID) override { 398 Profile(ID, LHS, getOpcode(), RHS, getType()); 399 } 400 401 // Implement isa<T> support. 402 static bool classof(const SymExpr *SE) { return SE->getKind() == ClassKind; } 403 }; 404 405 /// Represents a symbolic expression like 'x' + 3. 406 using SymIntExpr = BinarySymExprImpl<const SymExpr *, const llvm::APSInt &, 407 SymExpr::Kind::SymIntExprKind>; 408 409 /// Represents a symbolic expression like 3 - 'x'. 410 using IntSymExpr = BinarySymExprImpl<const llvm::APSInt &, const SymExpr *, 411 SymExpr::Kind::IntSymExprKind>; 412 413 /// Represents a symbolic expression like 'x' + 'y'. 414 using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *, 415 SymExpr::Kind::SymSymExprKind>; 416 417 class SymbolManager { 418 using DataSetTy = llvm::FoldingSet<SymExpr>; 419 using SymbolDependTy = 420 llvm::DenseMap<SymbolRef, std::unique_ptr<SymbolRefSmallVectorTy>>; 421 422 DataSetTy DataSet; 423 424 /// Stores the extra dependencies between symbols: the data should be kept 425 /// alive as long as the key is live. 426 SymbolDependTy SymbolDependencies; 427 428 unsigned SymbolCounter = 0; 429 llvm::BumpPtrAllocator& BPAlloc; 430 BasicValueFactory &BV; 431 ASTContext &Ctx; 432 433 public: 434 SymbolManager(ASTContext &ctx, BasicValueFactory &bv, 435 llvm::BumpPtrAllocator& bpalloc) 436 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} 437 438 static bool canSymbolicate(QualType T); 439 440 /// Make a unique symbol for MemRegion R according to its kind. 441 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R); 442 443 const SymbolConjured* conjureSymbol(const Stmt *E, 444 const LocationContext *LCtx, 445 QualType T, 446 unsigned VisitCount, 447 const void *SymbolTag = nullptr); 448 449 const SymbolConjured* conjureSymbol(const Expr *E, 450 const LocationContext *LCtx, 451 unsigned VisitCount, 452 const void *SymbolTag = nullptr) { 453 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag); 454 } 455 456 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol, 457 const TypedValueRegion *R); 458 459 const SymbolExtent *getExtentSymbol(const SubRegion *R); 460 461 /// Creates a metadata symbol associated with a specific region. 462 /// 463 /// VisitCount can be used to differentiate regions corresponding to 464 /// different loop iterations, thus, making the symbol path-dependent. 465 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S, 466 QualType T, 467 const LocationContext *LCtx, 468 unsigned VisitCount, 469 const void *SymbolTag = nullptr); 470 471 const SymbolCast* getCastSymbol(const SymExpr *Operand, 472 QualType From, QualType To); 473 474 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 475 const llvm::APSInt& rhs, QualType t); 476 477 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op, 478 const llvm::APSInt& rhs, QualType t) { 479 return getSymIntExpr(&lhs, op, rhs, t); 480 } 481 482 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs, 483 BinaryOperator::Opcode op, 484 const SymExpr *rhs, QualType t); 485 486 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 487 const SymExpr *rhs, QualType t); 488 489 QualType getType(const SymExpr *SE) const { 490 return SE->getType(); 491 } 492 493 /// Add artificial symbol dependency. 494 /// 495 /// The dependent symbol should stay alive as long as the primary is alive. 496 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent); 497 498 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary); 499 500 ASTContext &getContext() { return Ctx; } 501 BasicValueFactory &getBasicVals() { return BV; } 502 }; 503 504 /// A class responsible for cleaning up unused symbols. 505 class SymbolReaper { 506 enum SymbolStatus { 507 NotProcessed, 508 HaveMarkedDependents 509 }; 510 511 using SymbolSetTy = llvm::DenseSet<SymbolRef>; 512 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>; 513 using RegionSetTy = llvm::DenseSet<const MemRegion *>; 514 515 SymbolMapTy TheLiving; 516 SymbolSetTy MetadataInUse; 517 518 RegionSetTy RegionRoots; 519 520 const StackFrameContext *LCtx; 521 const Stmt *Loc; 522 SymbolManager& SymMgr; 523 StoreRef reapedStore; 524 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache; 525 526 public: 527 /// Construct a reaper object, which removes everything which is not 528 /// live before we execute statement s in the given location context. 529 /// 530 /// If the statement is NULL, everything is this and parent contexts is 531 /// considered live. 532 /// If the stack frame context is NULL, everything on stack is considered 533 /// dead. 534 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, 535 SymbolManager &symmgr, StoreManager &storeMgr) 536 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {} 537 538 const LocationContext *getLocationContext() const { return LCtx; } 539 540 bool isLive(SymbolRef sym); 541 bool isLiveRegion(const MemRegion *region); 542 bool isLive(const Expr *ExprVal, const LocationContext *LCtx) const; 543 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const; 544 545 /// Unconditionally marks a symbol as live. 546 /// 547 /// This should never be 548 /// used by checkers, only by the state infrastructure such as the store and 549 /// environment. Checkers should instead use metadata symbols and markInUse. 550 void markLive(SymbolRef sym); 551 552 /// Marks a symbol as important to a checker. 553 /// 554 /// For metadata symbols, 555 /// this will keep the symbol alive as long as its associated region is also 556 /// live. For other symbols, this has no effect; checkers are not permitted 557 /// to influence the life of other symbols. This should be used before any 558 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback. 559 void markInUse(SymbolRef sym); 560 561 using region_iterator = RegionSetTy::const_iterator; 562 563 region_iterator region_begin() const { return RegionRoots.begin(); } 564 region_iterator region_end() const { return RegionRoots.end(); } 565 566 /// Returns whether or not a symbol has been confirmed dead. 567 /// 568 /// This should only be called once all marking of dead symbols has completed. 569 /// (For checkers, this means only in the checkDeadSymbols callback.) 570 bool isDead(SymbolRef sym) { 571 return !isLive(sym); 572 } 573 574 void markLive(const MemRegion *region); 575 void markElementIndicesLive(const MemRegion *region); 576 577 /// Set to the value of the symbolic store after 578 /// StoreManager::removeDeadBindings has been called. 579 void setReapedStore(StoreRef st) { reapedStore = st; } 580 581 private: 582 /// Mark the symbols dependent on the input symbol as live. 583 void markDependentsLive(SymbolRef sym); 584 }; 585 586 class SymbolVisitor { 587 protected: 588 ~SymbolVisitor() = default; 589 590 public: 591 SymbolVisitor() = default; 592 SymbolVisitor(const SymbolVisitor &) = default; 593 SymbolVisitor(SymbolVisitor &&) {} 594 595 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols. 596 /// 597 /// The method returns \c true if symbols should continue be scanned and \c 598 /// false otherwise. 599 virtual bool VisitSymbol(SymbolRef sym) = 0; 600 virtual bool VisitMemRegion(const MemRegion *) { return true; } 601 }; 602 603 } // namespace ento 604 605 } // namespace clang 606 607 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 608