1 //===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SymbolManager, a class that manages symbolic values
10 //  created for use by ExprEngine and related classes.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
16 
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/AnalysisDeclContext.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/FoldingSet.h"
27 #include "llvm/Support/Allocator.h"
28 #include <cassert>
29 
30 namespace clang {
31 
32 class ASTContext;
33 class Stmt;
34 
35 namespace ento {
36 
37 class BasicValueFactory;
38 class StoreManager;
39 
40 ///A symbol representing the value stored at a MemRegion.
41 class SymbolRegionValue : public SymbolData {
42   const TypedValueRegion *R;
43 
44 public:
45   SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
46       : SymbolData(SymbolRegionValueKind, sym), R(r) {
47     assert(r);
48     assert(isValidTypeForSymbol(r->getValueType()));
49   }
50 
51   const TypedValueRegion* getRegion() const { return R; }
52 
53   static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
54     profile.AddInteger((unsigned) SymbolRegionValueKind);
55     profile.AddPointer(R);
56   }
57 
58   void Profile(llvm::FoldingSetNodeID& profile) override {
59     Profile(profile, R);
60   }
61 
62   void dumpToStream(raw_ostream &os) const override;
63   const MemRegion *getOriginRegion() const override { return getRegion(); }
64 
65   QualType getType() const override;
66 
67   // Implement isa<T> support.
68   static bool classof(const SymExpr *SE) {
69     return SE->getKind() == SymbolRegionValueKind;
70   }
71 };
72 
73 /// A symbol representing the result of an expression in the case when we do
74 /// not know anything about what the expression is.
75 class SymbolConjured : public SymbolData {
76   const Stmt *S;
77   QualType T;
78   unsigned Count;
79   const LocationContext *LCtx;
80   const void *SymbolTag;
81 
82 public:
83   SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
84                  QualType t, unsigned count, const void *symbolTag)
85       : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
86         LCtx(lctx), SymbolTag(symbolTag) {
87     // FIXME: 's' might be a nullptr if we're conducting invalidation
88     // that was caused by a destructor call on a temporary object,
89     // which has no statement associated with it.
90     // Due to this, we might be creating the same invalidation symbol for
91     // two different invalidation passes (for two different temporaries).
92     assert(lctx);
93     assert(isValidTypeForSymbol(t));
94   }
95 
96   const Stmt *getStmt() const { return S; }
97   unsigned getCount() const { return Count; }
98   const void *getTag() const { return SymbolTag; }
99 
100   QualType getType() const override;
101 
102   void dumpToStream(raw_ostream &os) const override;
103 
104   static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
105                       QualType T, unsigned Count, const LocationContext *LCtx,
106                       const void *SymbolTag) {
107     profile.AddInteger((unsigned) SymbolConjuredKind);
108     profile.AddPointer(S);
109     profile.AddPointer(LCtx);
110     profile.Add(T);
111     profile.AddInteger(Count);
112     profile.AddPointer(SymbolTag);
113   }
114 
115   void Profile(llvm::FoldingSetNodeID& profile) override {
116     Profile(profile, S, T, Count, LCtx, SymbolTag);
117   }
118 
119   // Implement isa<T> support.
120   static bool classof(const SymExpr *SE) {
121     return SE->getKind() == SymbolConjuredKind;
122   }
123 };
124 
125 /// A symbol representing the value of a MemRegion whose parent region has
126 /// symbolic value.
127 class SymbolDerived : public SymbolData {
128   SymbolRef parentSymbol;
129   const TypedValueRegion *R;
130 
131 public:
132   SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
133       : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
134     assert(parent);
135     assert(r);
136     assert(isValidTypeForSymbol(r->getValueType()));
137   }
138 
139   SymbolRef getParentSymbol() const { return parentSymbol; }
140   const TypedValueRegion *getRegion() const { return R; }
141 
142   QualType getType() const override;
143 
144   void dumpToStream(raw_ostream &os) const override;
145   const MemRegion *getOriginRegion() const override { return getRegion(); }
146 
147   static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
148                       const TypedValueRegion *r) {
149     profile.AddInteger((unsigned) SymbolDerivedKind);
150     profile.AddPointer(r);
151     profile.AddPointer(parent);
152   }
153 
154   void Profile(llvm::FoldingSetNodeID& profile) override {
155     Profile(profile, parentSymbol, R);
156   }
157 
158   // Implement isa<T> support.
159   static bool classof(const SymExpr *SE) {
160     return SE->getKind() == SymbolDerivedKind;
161   }
162 };
163 
164 /// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
165 ///  Clients should not ask the SymbolManager for a region's extent. Always use
166 ///  SubRegion::getExtent instead -- the value returned may not be a symbol.
167 class SymbolExtent : public SymbolData {
168   const SubRegion *R;
169 
170 public:
171   SymbolExtent(SymbolID sym, const SubRegion *r)
172       : SymbolData(SymbolExtentKind, sym), R(r) {
173     assert(r);
174   }
175 
176   const SubRegion *getRegion() const { return R; }
177 
178   QualType getType() const override;
179 
180   void dumpToStream(raw_ostream &os) const override;
181 
182   static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
183     profile.AddInteger((unsigned) SymbolExtentKind);
184     profile.AddPointer(R);
185   }
186 
187   void Profile(llvm::FoldingSetNodeID& profile) override {
188     Profile(profile, R);
189   }
190 
191   // Implement isa<T> support.
192   static bool classof(const SymExpr *SE) {
193     return SE->getKind() == SymbolExtentKind;
194   }
195 };
196 
197 /// SymbolMetadata - Represents path-dependent metadata about a specific region.
198 ///  Metadata symbols remain live as long as they are marked as in use before
199 ///  dead-symbol sweeping AND their associated regions are still alive.
200 ///  Intended for use by checkers.
201 class SymbolMetadata : public SymbolData {
202   const MemRegion* R;
203   const Stmt *S;
204   QualType T;
205   const LocationContext *LCtx;
206   unsigned Count;
207   const void *Tag;
208 
209 public:
210   SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
211                  const LocationContext *LCtx, unsigned count, const void *tag)
212       : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
213         Count(count), Tag(tag) {
214       assert(r);
215       assert(s);
216       assert(isValidTypeForSymbol(t));
217       assert(LCtx);
218       assert(tag);
219     }
220 
221   const MemRegion *getRegion() const { return R; }
222   const Stmt *getStmt() const { return S; }
223   const LocationContext *getLocationContext() const { return LCtx; }
224   unsigned getCount() const { return Count; }
225   const void *getTag() const { return Tag; }
226 
227   QualType getType() const override;
228 
229   void dumpToStream(raw_ostream &os) const override;
230 
231   static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
232                       const Stmt *S, QualType T, const LocationContext *LCtx,
233                       unsigned Count, const void *Tag) {
234     profile.AddInteger((unsigned) SymbolMetadataKind);
235     profile.AddPointer(R);
236     profile.AddPointer(S);
237     profile.Add(T);
238     profile.AddPointer(LCtx);
239     profile.AddInteger(Count);
240     profile.AddPointer(Tag);
241   }
242 
243   void Profile(llvm::FoldingSetNodeID& profile) override {
244     Profile(profile, R, S, T, LCtx, Count, Tag);
245   }
246 
247   // Implement isa<T> support.
248   static bool classof(const SymExpr *SE) {
249     return SE->getKind() == SymbolMetadataKind;
250   }
251 };
252 
253 /// Represents a cast expression.
254 class SymbolCast : public SymExpr {
255   const SymExpr *Operand;
256 
257   /// Type of the operand.
258   QualType FromTy;
259 
260   /// The type of the result.
261   QualType ToTy;
262 
263 public:
264   SymbolCast(const SymExpr *In, QualType From, QualType To)
265       : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
266     assert(In);
267     assert(isValidTypeForSymbol(From));
268     // FIXME: GenericTaintChecker creates symbols of void type.
269     // Otherwise, 'To' should also be a valid type.
270   }
271 
272   unsigned computeComplexity() const override {
273     if (Complexity == 0)
274       Complexity = 1 + Operand->computeComplexity();
275     return Complexity;
276   }
277 
278   QualType getType() const override { return ToTy; }
279 
280   const SymExpr *getOperand() const { return Operand; }
281 
282   void dumpToStream(raw_ostream &os) const override;
283 
284   static void Profile(llvm::FoldingSetNodeID& ID,
285                       const SymExpr *In, QualType From, QualType To) {
286     ID.AddInteger((unsigned) SymbolCastKind);
287     ID.AddPointer(In);
288     ID.Add(From);
289     ID.Add(To);
290   }
291 
292   void Profile(llvm::FoldingSetNodeID& ID) override {
293     Profile(ID, Operand, FromTy, ToTy);
294   }
295 
296   // Implement isa<T> support.
297   static bool classof(const SymExpr *SE) {
298     return SE->getKind() == SymbolCastKind;
299   }
300 };
301 
302 /// Represents a symbolic expression involving a binary operator
303 class BinarySymExpr : public SymExpr {
304   BinaryOperator::Opcode Op;
305   QualType T;
306 
307 protected:
308   BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
309       : SymExpr(k), Op(op), T(t) {
310     assert(classof(this));
311     // Binary expressions are results of arithmetic. Pointer arithmetic is not
312     // handled by binary expressions, but it is instead handled by applying
313     // sub-regions to regions.
314     assert(isValidTypeForSymbol(t) && !Loc::isLocType(t));
315   }
316 
317 public:
318   // FIXME: We probably need to make this out-of-line to avoid redundant
319   // generation of virtual functions.
320   QualType getType() const override { return T; }
321 
322   BinaryOperator::Opcode getOpcode() const { return Op; }
323 
324   // Implement isa<T> support.
325   static bool classof(const SymExpr *SE) {
326     Kind k = SE->getKind();
327     return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
328   }
329 };
330 
331 /// Represents a symbolic expression like 'x' + 3.
332 class SymIntExpr : public BinarySymExpr {
333   const SymExpr *LHS;
334   const llvm::APSInt& RHS;
335 
336 public:
337   SymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
338              const llvm::APSInt &rhs, QualType t)
339       : BinarySymExpr(SymIntExprKind, op, t), LHS(lhs), RHS(rhs) {
340     assert(lhs);
341   }
342 
343   void dumpToStream(raw_ostream &os) const override;
344 
345   const SymExpr *getLHS() const { return LHS; }
346   const llvm::APSInt &getRHS() const { return RHS; }
347 
348   unsigned computeComplexity() const override {
349     if (Complexity == 0)
350       Complexity = 1 + LHS->computeComplexity();
351     return Complexity;
352   }
353 
354   static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
355                       BinaryOperator::Opcode op, const llvm::APSInt& rhs,
356                       QualType t) {
357     ID.AddInteger((unsigned) SymIntExprKind);
358     ID.AddPointer(lhs);
359     ID.AddInteger(op);
360     ID.AddPointer(&rhs);
361     ID.Add(t);
362   }
363 
364   void Profile(llvm::FoldingSetNodeID& ID) override {
365     Profile(ID, LHS, getOpcode(), RHS, getType());
366   }
367 
368   // Implement isa<T> support.
369   static bool classof(const SymExpr *SE) {
370     return SE->getKind() == SymIntExprKind;
371   }
372 };
373 
374 /// Represents a symbolic expression like 3 - 'x'.
375 class IntSymExpr : public BinarySymExpr {
376   const llvm::APSInt& LHS;
377   const SymExpr *RHS;
378 
379 public:
380   IntSymExpr(const llvm::APSInt &lhs, BinaryOperator::Opcode op,
381              const SymExpr *rhs, QualType t)
382       : BinarySymExpr(IntSymExprKind, op, t), LHS(lhs), RHS(rhs) {
383     assert(rhs);
384   }
385 
386   void dumpToStream(raw_ostream &os) const override;
387 
388   const SymExpr *getRHS() const { return RHS; }
389   const llvm::APSInt &getLHS() const { return LHS; }
390 
391   unsigned computeComplexity() const override {
392     if (Complexity == 0)
393       Complexity = 1 + RHS->computeComplexity();
394     return Complexity;
395   }
396 
397   static void Profile(llvm::FoldingSetNodeID& ID, const llvm::APSInt& lhs,
398                       BinaryOperator::Opcode op, const SymExpr *rhs,
399                       QualType t) {
400     ID.AddInteger((unsigned) IntSymExprKind);
401     ID.AddPointer(&lhs);
402     ID.AddInteger(op);
403     ID.AddPointer(rhs);
404     ID.Add(t);
405   }
406 
407   void Profile(llvm::FoldingSetNodeID& ID) override {
408     Profile(ID, LHS, getOpcode(), RHS, getType());
409   }
410 
411   // Implement isa<T> support.
412   static bool classof(const SymExpr *SE) {
413     return SE->getKind() == IntSymExprKind;
414   }
415 };
416 
417 /// Represents a symbolic expression like 'x' + 'y'.
418 class SymSymExpr : public BinarySymExpr {
419   const SymExpr *LHS;
420   const SymExpr *RHS;
421 
422 public:
423   SymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op, const SymExpr *rhs,
424              QualType t)
425       : BinarySymExpr(SymSymExprKind, op, t), LHS(lhs), RHS(rhs) {
426     assert(lhs);
427     assert(rhs);
428   }
429 
430   const SymExpr *getLHS() const { return LHS; }
431   const SymExpr *getRHS() const { return RHS; }
432 
433   void dumpToStream(raw_ostream &os) const override;
434 
435   unsigned computeComplexity() const override {
436     if (Complexity == 0)
437       Complexity = RHS->computeComplexity() + LHS->computeComplexity();
438     return Complexity;
439   }
440 
441   static void Profile(llvm::FoldingSetNodeID& ID, const SymExpr *lhs,
442                     BinaryOperator::Opcode op, const SymExpr *rhs, QualType t) {
443     ID.AddInteger((unsigned) SymSymExprKind);
444     ID.AddPointer(lhs);
445     ID.AddInteger(op);
446     ID.AddPointer(rhs);
447     ID.Add(t);
448   }
449 
450   void Profile(llvm::FoldingSetNodeID& ID) override {
451     Profile(ID, LHS, getOpcode(), RHS, getType());
452   }
453 
454   // Implement isa<T> support.
455   static bool classof(const SymExpr *SE) {
456     return SE->getKind() == SymSymExprKind;
457   }
458 };
459 
460 class SymbolManager {
461   using DataSetTy = llvm::FoldingSet<SymExpr>;
462   using SymbolDependTy = llvm::DenseMap<SymbolRef, SymbolRefSmallVectorTy *>;
463 
464   DataSetTy DataSet;
465 
466   /// Stores the extra dependencies between symbols: the data should be kept
467   /// alive as long as the key is live.
468   SymbolDependTy SymbolDependencies;
469 
470   unsigned SymbolCounter = 0;
471   llvm::BumpPtrAllocator& BPAlloc;
472   BasicValueFactory &BV;
473   ASTContext &Ctx;
474 
475 public:
476   SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
477                 llvm::BumpPtrAllocator& bpalloc)
478       : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
479   ~SymbolManager();
480 
481   static bool canSymbolicate(QualType T);
482 
483   /// Make a unique symbol for MemRegion R according to its kind.
484   const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
485 
486   const SymbolConjured* conjureSymbol(const Stmt *E,
487                                       const LocationContext *LCtx,
488                                       QualType T,
489                                       unsigned VisitCount,
490                                       const void *SymbolTag = nullptr);
491 
492   const SymbolConjured* conjureSymbol(const Expr *E,
493                                       const LocationContext *LCtx,
494                                       unsigned VisitCount,
495                                       const void *SymbolTag = nullptr) {
496     return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
497   }
498 
499   const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
500                                         const TypedValueRegion *R);
501 
502   const SymbolExtent *getExtentSymbol(const SubRegion *R);
503 
504   /// Creates a metadata symbol associated with a specific region.
505   ///
506   /// VisitCount can be used to differentiate regions corresponding to
507   /// different loop iterations, thus, making the symbol path-dependent.
508   const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S,
509                                           QualType T,
510                                           const LocationContext *LCtx,
511                                           unsigned VisitCount,
512                                           const void *SymbolTag = nullptr);
513 
514   const SymbolCast* getCastSymbol(const SymExpr *Operand,
515                                   QualType From, QualType To);
516 
517   const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
518                                   const llvm::APSInt& rhs, QualType t);
519 
520   const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
521                                   const llvm::APSInt& rhs, QualType t) {
522     return getSymIntExpr(&lhs, op, rhs, t);
523   }
524 
525   const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
526                                   BinaryOperator::Opcode op,
527                                   const SymExpr *rhs, QualType t);
528 
529   const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
530                                   const SymExpr *rhs, QualType t);
531 
532   QualType getType(const SymExpr *SE) const {
533     return SE->getType();
534   }
535 
536   /// Add artificial symbol dependency.
537   ///
538   /// The dependent symbol should stay alive as long as the primary is alive.
539   void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
540 
541   const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
542 
543   ASTContext &getContext() { return Ctx; }
544   BasicValueFactory &getBasicVals() { return BV; }
545 };
546 
547 /// A class responsible for cleaning up unused symbols.
548 class SymbolReaper {
549   enum SymbolStatus {
550     NotProcessed,
551     HaveMarkedDependents
552   };
553 
554   using SymbolSetTy = llvm::DenseSet<SymbolRef>;
555   using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
556   using RegionSetTy = llvm::DenseSet<const MemRegion *>;
557 
558   SymbolMapTy TheLiving;
559   SymbolSetTy MetadataInUse;
560 
561   RegionSetTy RegionRoots;
562 
563   const StackFrameContext *LCtx;
564   const Stmt *Loc;
565   SymbolManager& SymMgr;
566   StoreRef reapedStore;
567   llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
568 
569 public:
570   /// Construct a reaper object, which removes everything which is not
571   /// live before we execute statement s in the given location context.
572   ///
573   /// If the statement is NULL, everything is this and parent contexts is
574   /// considered live.
575   /// If the stack frame context is NULL, everything on stack is considered
576   /// dead.
577   SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
578                SymbolManager &symmgr, StoreManager &storeMgr)
579       : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
580 
581   const LocationContext *getLocationContext() const { return LCtx; }
582 
583   bool isLive(SymbolRef sym);
584   bool isLiveRegion(const MemRegion *region);
585   bool isLive(const Stmt *ExprVal, const LocationContext *LCtx) const;
586   bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
587 
588   /// Unconditionally marks a symbol as live.
589   ///
590   /// This should never be
591   /// used by checkers, only by the state infrastructure such as the store and
592   /// environment. Checkers should instead use metadata symbols and markInUse.
593   void markLive(SymbolRef sym);
594 
595   /// Marks a symbol as important to a checker.
596   ///
597   /// For metadata symbols,
598   /// this will keep the symbol alive as long as its associated region is also
599   /// live. For other symbols, this has no effect; checkers are not permitted
600   /// to influence the life of other symbols. This should be used before any
601   /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
602   void markInUse(SymbolRef sym);
603 
604   using region_iterator = RegionSetTy::const_iterator;
605 
606   region_iterator region_begin() const { return RegionRoots.begin(); }
607   region_iterator region_end() const { return RegionRoots.end(); }
608 
609   /// Returns whether or not a symbol has been confirmed dead.
610   ///
611   /// This should only be called once all marking of dead symbols has completed.
612   /// (For checkers, this means only in the checkDeadSymbols callback.)
613   bool isDead(SymbolRef sym) {
614     return !isLive(sym);
615   }
616 
617   void markLive(const MemRegion *region);
618   void markElementIndicesLive(const MemRegion *region);
619 
620   /// Set to the value of the symbolic store after
621   /// StoreManager::removeDeadBindings has been called.
622   void setReapedStore(StoreRef st) { reapedStore = st; }
623 
624 private:
625   /// Mark the symbols dependent on the input symbol as live.
626   void markDependentsLive(SymbolRef sym);
627 };
628 
629 class SymbolVisitor {
630 protected:
631   ~SymbolVisitor() = default;
632 
633 public:
634   SymbolVisitor() = default;
635   SymbolVisitor(const SymbolVisitor &) = default;
636   SymbolVisitor(SymbolVisitor &&) {}
637 
638   /// A visitor method invoked by ProgramStateManager::scanReachableSymbols.
639   ///
640   /// The method returns \c true if symbols should continue be scanned and \c
641   /// false otherwise.
642   virtual bool VisitSymbol(SymbolRef sym) = 0;
643   virtual bool VisitMemRegion(const MemRegion *) { return true; }
644 };
645 
646 } // namespace ento
647 
648 } // namespace clang
649 
650 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
651