1 //===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SymbolManager, a class that manages symbolic values 10 // created for use by ExprEngine and related classes. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 16 17 #include "clang/AST/Expr.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/AnalysisDeclContext.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/DenseSet.h" 26 #include "llvm/ADT/FoldingSet.h" 27 #include "llvm/Support/Allocator.h" 28 #include <cassert> 29 30 namespace clang { 31 32 class ASTContext; 33 class Stmt; 34 35 namespace ento { 36 37 class BasicValueFactory; 38 class StoreManager; 39 40 ///A symbol representing the value stored at a MemRegion. 41 class SymbolRegionValue : public SymbolData { 42 const TypedValueRegion *R; 43 44 public: 45 SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) 46 : SymbolData(SymbolRegionValueKind, sym), R(r) { 47 assert(r); 48 assert(isValidTypeForSymbol(r->getValueType())); 49 } 50 51 const TypedValueRegion* getRegion() const { return R; } 52 53 static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { 54 profile.AddInteger((unsigned) SymbolRegionValueKind); 55 profile.AddPointer(R); 56 } 57 58 void Profile(llvm::FoldingSetNodeID& profile) override { 59 Profile(profile, R); 60 } 61 62 void dumpToStream(raw_ostream &os) const override; 63 const MemRegion *getOriginRegion() const override { return getRegion(); } 64 65 QualType getType() const override; 66 67 // Implement isa<T> support. 68 static bool classof(const SymExpr *SE) { 69 return SE->getKind() == SymbolRegionValueKind; 70 } 71 }; 72 73 /// A symbol representing the result of an expression in the case when we do 74 /// not know anything about what the expression is. 75 class SymbolConjured : public SymbolData { 76 const Stmt *S; 77 QualType T; 78 unsigned Count; 79 const LocationContext *LCtx; 80 const void *SymbolTag; 81 82 public: 83 SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, 84 QualType t, unsigned count, const void *symbolTag) 85 : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), 86 LCtx(lctx), SymbolTag(symbolTag) { 87 // FIXME: 's' might be a nullptr if we're conducting invalidation 88 // that was caused by a destructor call on a temporary object, 89 // which has no statement associated with it. 90 // Due to this, we might be creating the same invalidation symbol for 91 // two different invalidation passes (for two different temporaries). 92 assert(lctx); 93 assert(isValidTypeForSymbol(t)); 94 } 95 96 const Stmt *getStmt() const { return S; } 97 unsigned getCount() const { return Count; } 98 const void *getTag() const { return SymbolTag; } 99 100 QualType getType() const override; 101 102 void dumpToStream(raw_ostream &os) const override; 103 104 static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S, 105 QualType T, unsigned Count, const LocationContext *LCtx, 106 const void *SymbolTag) { 107 profile.AddInteger((unsigned) SymbolConjuredKind); 108 profile.AddPointer(S); 109 profile.AddPointer(LCtx); 110 profile.Add(T); 111 profile.AddInteger(Count); 112 profile.AddPointer(SymbolTag); 113 } 114 115 void Profile(llvm::FoldingSetNodeID& profile) override { 116 Profile(profile, S, T, Count, LCtx, SymbolTag); 117 } 118 119 // Implement isa<T> support. 120 static bool classof(const SymExpr *SE) { 121 return SE->getKind() == SymbolConjuredKind; 122 } 123 }; 124 125 /// A symbol representing the value of a MemRegion whose parent region has 126 /// symbolic value. 127 class SymbolDerived : public SymbolData { 128 SymbolRef parentSymbol; 129 const TypedValueRegion *R; 130 131 public: 132 SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) 133 : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { 134 assert(parent); 135 assert(r); 136 assert(isValidTypeForSymbol(r->getValueType())); 137 } 138 139 SymbolRef getParentSymbol() const { return parentSymbol; } 140 const TypedValueRegion *getRegion() const { return R; } 141 142 QualType getType() const override; 143 144 void dumpToStream(raw_ostream &os) const override; 145 const MemRegion *getOriginRegion() const override { return getRegion(); } 146 147 static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent, 148 const TypedValueRegion *r) { 149 profile.AddInteger((unsigned) SymbolDerivedKind); 150 profile.AddPointer(r); 151 profile.AddPointer(parent); 152 } 153 154 void Profile(llvm::FoldingSetNodeID& profile) override { 155 Profile(profile, parentSymbol, R); 156 } 157 158 // Implement isa<T> support. 159 static bool classof(const SymExpr *SE) { 160 return SE->getKind() == SymbolDerivedKind; 161 } 162 }; 163 164 /// SymbolExtent - Represents the extent (size in bytes) of a bounded region. 165 /// Clients should not ask the SymbolManager for a region's extent. Always use 166 /// SubRegion::getExtent instead -- the value returned may not be a symbol. 167 class SymbolExtent : public SymbolData { 168 const SubRegion *R; 169 170 public: 171 SymbolExtent(SymbolID sym, const SubRegion *r) 172 : SymbolData(SymbolExtentKind, sym), R(r) { 173 assert(r); 174 } 175 176 const SubRegion *getRegion() const { return R; } 177 178 QualType getType() const override; 179 180 void dumpToStream(raw_ostream &os) const override; 181 182 static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) { 183 profile.AddInteger((unsigned) SymbolExtentKind); 184 profile.AddPointer(R); 185 } 186 187 void Profile(llvm::FoldingSetNodeID& profile) override { 188 Profile(profile, R); 189 } 190 191 // Implement isa<T> support. 192 static bool classof(const SymExpr *SE) { 193 return SE->getKind() == SymbolExtentKind; 194 } 195 }; 196 197 /// SymbolMetadata - Represents path-dependent metadata about a specific region. 198 /// Metadata symbols remain live as long as they are marked as in use before 199 /// dead-symbol sweeping AND their associated regions are still alive. 200 /// Intended for use by checkers. 201 class SymbolMetadata : public SymbolData { 202 const MemRegion* R; 203 const Stmt *S; 204 QualType T; 205 const LocationContext *LCtx; 206 unsigned Count; 207 const void *Tag; 208 209 public: 210 SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, 211 const LocationContext *LCtx, unsigned count, const void *tag) 212 : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), 213 Count(count), Tag(tag) { 214 assert(r); 215 assert(s); 216 assert(isValidTypeForSymbol(t)); 217 assert(LCtx); 218 assert(tag); 219 } 220 221 const MemRegion *getRegion() const { return R; } 222 const Stmt *getStmt() const { return S; } 223 const LocationContext *getLocationContext() const { return LCtx; } 224 unsigned getCount() const { return Count; } 225 const void *getTag() const { return Tag; } 226 227 QualType getType() const override; 228 229 void dumpToStream(raw_ostream &os) const override; 230 231 static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R, 232 const Stmt *S, QualType T, const LocationContext *LCtx, 233 unsigned Count, const void *Tag) { 234 profile.AddInteger((unsigned) SymbolMetadataKind); 235 profile.AddPointer(R); 236 profile.AddPointer(S); 237 profile.Add(T); 238 profile.AddPointer(LCtx); 239 profile.AddInteger(Count); 240 profile.AddPointer(Tag); 241 } 242 243 void Profile(llvm::FoldingSetNodeID& profile) override { 244 Profile(profile, R, S, T, LCtx, Count, Tag); 245 } 246 247 // Implement isa<T> support. 248 static bool classof(const SymExpr *SE) { 249 return SE->getKind() == SymbolMetadataKind; 250 } 251 }; 252 253 /// Represents a cast expression. 254 class SymbolCast : public SymExpr { 255 const SymExpr *Operand; 256 257 /// Type of the operand. 258 QualType FromTy; 259 260 /// The type of the result. 261 QualType ToTy; 262 263 public: 264 SymbolCast(const SymExpr *In, QualType From, QualType To) 265 : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { 266 assert(In); 267 assert(isValidTypeForSymbol(From)); 268 // FIXME: GenericTaintChecker creates symbols of void type. 269 // Otherwise, 'To' should also be a valid type. 270 } 271 272 unsigned computeComplexity() const override { 273 if (Complexity == 0) 274 Complexity = 1 + Operand->computeComplexity(); 275 return Complexity; 276 } 277 278 QualType getType() const override { return ToTy; } 279 280 const SymExpr *getOperand() const { return Operand; } 281 282 void dumpToStream(raw_ostream &os) const override; 283 284 static void Profile(llvm::FoldingSetNodeID& ID, 285 const SymExpr *In, QualType From, QualType To) { 286 ID.AddInteger((unsigned) SymbolCastKind); 287 ID.AddPointer(In); 288 ID.Add(From); 289 ID.Add(To); 290 } 291 292 void Profile(llvm::FoldingSetNodeID& ID) override { 293 Profile(ID, Operand, FromTy, ToTy); 294 } 295 296 // Implement isa<T> support. 297 static bool classof(const SymExpr *SE) { 298 return SE->getKind() == SymbolCastKind; 299 } 300 }; 301 302 /// Represents a symbolic expression involving a binary operator 303 class BinarySymExpr : public SymExpr { 304 BinaryOperator::Opcode Op; 305 QualType T; 306 307 protected: 308 BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t) 309 : SymExpr(k), Op(op), T(t) { 310 assert(classof(this)); 311 // Binary expressions are results of arithmetic. Pointer arithmetic is not 312 // handled by binary expressions, but it is instead handled by applying 313 // sub-regions to regions. 314 assert(isValidTypeForSymbol(t) && !Loc::isLocType(t)); 315 } 316 317 public: 318 // FIXME: We probably need to make this out-of-line to avoid redundant 319 // generation of virtual functions. 320 QualType getType() const override { return T; } 321 322 BinaryOperator::Opcode getOpcode() const { return Op; } 323 324 // Implement isa<T> support. 325 static bool classof(const SymExpr *SE) { 326 Kind k = SE->getKind(); 327 return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS; 328 } 329 }; 330 331 /// Represents a symbolic expression like 'x' + 3. 332 class SymIntExpr : public BinarySymExpr { 333 const SymExpr *LHS; 334 const llvm::APSInt& RHS; 335 336 public: 337 SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 338 const llvm::APSInt &rhs, QualType t) 339 : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) { 340 assert(lhs); 341 } 342 343 void dumpToStream(raw_ostream &os) const override; 344 345 const SymExpr *getLHS() const { return LHS; } 346 const llvm::APSInt &getRHS() const { return RHS; } 347 348 unsigned computeComplexity() const override { 349 if (Complexity == 0) 350 Complexity = 1 + LHS->computeComplexity(); 351 return Complexity; 352 } 353 354 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, 355 BinaryOperator::Opcode op, const llvm::APSInt& rhs, 356 QualType t) { 357 ID.AddInteger((unsigned) SymIntExprKind); 358 ID.AddPointer(lhs); 359 ID.AddInteger(op); 360 ID.AddPointer(&rhs); 361 ID.Add(t); 362 } 363 364 void Profile(llvm::FoldingSetNodeID& ID) override { 365 Profile(ID, LHS, getOpcode(), RHS, getType()); 366 } 367 368 // Implement isa<T> support. 369 static bool classof(const SymExpr *SE) { 370 return SE->getKind() == SymIntExprKind; 371 } 372 }; 373 374 /// Represents a symbolic expression like 3 - 'x'. 375 class IntSymExpr : public BinarySymExpr { 376 const llvm::APSInt& LHS; 377 const SymExpr *RHS; 378 379 public: 380 IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op, 381 const SymExpr *rhs, QualType t) 382 : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) { 383 assert(rhs); 384 } 385 386 void dumpToStream(raw_ostream &os) const override; 387 388 const SymExpr *getRHS() const { return RHS; } 389 const llvm::APSInt &getLHS() const { return LHS; } 390 391 unsigned computeComplexity() const override { 392 if (Complexity == 0) 393 Complexity = 1 + RHS->computeComplexity(); 394 return Complexity; 395 } 396 397 static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs, 398 BinaryOperator::Opcode op, const SymExpr *rhs, 399 QualType t) { 400 ID.AddInteger((unsigned) IntSymExprKind); 401 ID.AddPointer(&lhs); 402 ID.AddInteger(op); 403 ID.AddPointer(rhs); 404 ID.Add(t); 405 } 406 407 void Profile(llvm::FoldingSetNodeID& ID) override { 408 Profile(ID, LHS, getOpcode(), RHS, getType()); 409 } 410 411 // Implement isa<T> support. 412 static bool classof(const SymExpr *SE) { 413 return SE->getKind() == IntSymExprKind; 414 } 415 }; 416 417 /// Represents a symbolic expression like 'x' + 'y'. 418 class SymSymExpr : public BinarySymExpr { 419 const SymExpr *LHS; 420 const SymExpr *RHS; 421 422 public: 423 SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs, 424 QualType t) 425 : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) { 426 assert(lhs); 427 assert(rhs); 428 } 429 430 const SymExpr *getLHS() const { return LHS; } 431 const SymExpr *getRHS() const { return RHS; } 432 433 void dumpToStream(raw_ostream &os) const override; 434 435 unsigned computeComplexity() const override { 436 if (Complexity == 0) 437 Complexity = RHS->computeComplexity() + LHS->computeComplexity(); 438 return Complexity; 439 } 440 441 static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs, 442 BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) { 443 ID.AddInteger((unsigned) SymSymExprKind); 444 ID.AddPointer(lhs); 445 ID.AddInteger(op); 446 ID.AddPointer(rhs); 447 ID.Add(t); 448 } 449 450 void Profile(llvm::FoldingSetNodeID& ID) override { 451 Profile(ID, LHS, getOpcode(), RHS, getType()); 452 } 453 454 // Implement isa<T> support. 455 static bool classof(const SymExpr *SE) { 456 return SE->getKind() == SymSymExprKind; 457 } 458 }; 459 460 class SymbolManager { 461 using DataSetTy = llvm::FoldingSet<SymExpr>; 462 using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>; 463 464 DataSetTy DataSet; 465 466 /// Stores the extra dependencies between symbols: the data should be kept 467 /// alive as long as the key is live. 468 SymbolDependTy SymbolDependencies; 469 470 unsigned SymbolCounter = 0; 471 llvm::BumpPtrAllocator& BPAlloc; 472 BasicValueFactory &BV; 473 ASTContext &Ctx; 474 475 public: 476 SymbolManager(ASTContext &ctx, BasicValueFactory &bv, 477 llvm::BumpPtrAllocator& bpalloc) 478 : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} 479 ~SymbolManager(); 480 481 static bool canSymbolicate(QualType T); 482 483 /// Make a unique symbol for MemRegion R according to its kind. 484 const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R); 485 486 const SymbolConjured* conjureSymbol(const Stmt *E, 487 const LocationContext *LCtx, 488 QualType T, 489 unsigned VisitCount, 490 const void *SymbolTag = nullptr); 491 492 const SymbolConjured* conjureSymbol(const Expr *E, 493 const LocationContext *LCtx, 494 unsigned VisitCount, 495 const void *SymbolTag = nullptr) { 496 return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag); 497 } 498 499 const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol, 500 const TypedValueRegion *R); 501 502 const SymbolExtent *getExtentSymbol(const SubRegion *R); 503 504 /// Creates a metadata symbol associated with a specific region. 505 /// 506 /// VisitCount can be used to differentiate regions corresponding to 507 /// different loop iterations, thus, making the symbol path-dependent. 508 const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S, 509 QualType T, 510 const LocationContext *LCtx, 511 unsigned VisitCount, 512 const void *SymbolTag = nullptr); 513 514 const SymbolCast* getCastSymbol(const SymExpr *Operand, 515 QualType From, QualType To); 516 517 const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 518 const llvm::APSInt& rhs, QualType t); 519 520 const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op, 521 const llvm::APSInt& rhs, QualType t) { 522 return getSymIntExpr(&lhs, op, rhs, t); 523 } 524 525 const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs, 526 BinaryOperator::Opcode op, 527 const SymExpr *rhs, QualType t); 528 529 const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, 530 const SymExpr *rhs, QualType t); 531 532 QualType getType(const SymExpr *SE) const { 533 return SE->getType(); 534 } 535 536 /// Add artificial symbol dependency. 537 /// 538 /// The dependent symbol should stay alive as long as the primary is alive. 539 void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent); 540 541 const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary); 542 543 ASTContext &getContext() { return Ctx; } 544 BasicValueFactory &getBasicVals() { return BV; } 545 }; 546 547 /// A class responsible for cleaning up unused symbols. 548 class SymbolReaper { 549 enum SymbolStatus { 550 NotProcessed, 551 HaveMarkedDependents 552 }; 553 554 using SymbolSetTy = llvm::DenseSet<SymbolRef>; 555 using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>; 556 using RegionSetTy = llvm::DenseSet<const MemRegion *>; 557 558 SymbolMapTy TheLiving; 559 SymbolSetTy MetadataInUse; 560 561 RegionSetTy RegionRoots; 562 563 const StackFrameContext *LCtx; 564 const Stmt *Loc; 565 SymbolManager& SymMgr; 566 StoreRef reapedStore; 567 llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache; 568 569 public: 570 /// Construct a reaper object, which removes everything which is not 571 /// live before we execute statement s in the given location context. 572 /// 573 /// If the statement is NULL, everything is this and parent contexts is 574 /// considered live. 575 /// If the stack frame context is NULL, everything on stack is considered 576 /// dead. 577 SymbolReaper(const StackFrameContext *Ctx, const Stmt *s, 578 SymbolManager &symmgr, StoreManager &storeMgr) 579 : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {} 580 581 const LocationContext *getLocationContext() const { return LCtx; } 582 583 bool isLive(SymbolRef sym); 584 bool isLiveRegion(const MemRegion *region); 585 bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const; 586 bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const; 587 588 /// Unconditionally marks a symbol as live. 589 /// 590 /// This should never be 591 /// used by checkers, only by the state infrastructure such as the store and 592 /// environment. Checkers should instead use metadata symbols and markInUse. 593 void markLive(SymbolRef sym); 594 595 /// Marks a symbol as important to a checker. 596 /// 597 /// For metadata symbols, 598 /// this will keep the symbol alive as long as its associated region is also 599 /// live. For other symbols, this has no effect; checkers are not permitted 600 /// to influence the life of other symbols. This should be used before any 601 /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback. 602 void markInUse(SymbolRef sym); 603 604 using region_iterator = RegionSetTy::const_iterator; 605 606 region_iterator region_begin() const { return RegionRoots.begin(); } 607 region_iterator region_end() const { return RegionRoots.end(); } 608 609 /// Returns whether or not a symbol has been confirmed dead. 610 /// 611 /// This should only be called once all marking of dead symbols has completed. 612 /// (For checkers, this means only in the checkDeadSymbols callback.) 613 bool isDead(SymbolRef sym) { 614 return !isLive(sym); 615 } 616 617 void markLive(const MemRegion *region); 618 void markElementIndicesLive(const MemRegion *region); 619 620 /// Set to the value of the symbolic store after 621 /// StoreManager::removeDeadBindings has been called. 622 void setReapedStore(StoreRef st) { reapedStore = st; } 623 624 private: 625 /// Mark the symbols dependent on the input symbol as live. 626 void markDependentsLive(SymbolRef sym); 627 }; 628 629 class SymbolVisitor { 630 protected: 631 ~SymbolVisitor() = default; 632 633 public: 634 SymbolVisitor() = default; 635 SymbolVisitor(const SymbolVisitor &) = default; 636 SymbolVisitor(SymbolVisitor &&) {} 637 638 /// A visitor method invoked by ProgramStateManager::scanReachableSymbols. 639 /// 640 /// The method returns \c true if symbols should continue be scanned and \c 641 /// false otherwise. 642 virtual bool VisitSymbol(SymbolRef sym) = 0; 643 virtual bool VisitMemRegion(const MemRegion *) { return true; } 644 }; 645 646 } // namespace ento 647 648 } // namespace clang 649 650 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H 651