1 //===- SymbolManager.h - Management of Symbolic Values ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SymbolManager, a class that manages symbolic values
10 //  created for use by ExprEngine and related classes.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
16 
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/AnalysisDeclContext.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/FoldingSet.h"
27 #include "llvm/Support/Allocator.h"
28 #include <cassert>
29 
30 namespace clang {
31 
32 class ASTContext;
33 class Stmt;
34 
35 namespace ento {
36 
37 class BasicValueFactory;
38 class StoreManager;
39 
40 ///A symbol representing the value stored at a MemRegion.
41 class SymbolRegionValue : public SymbolData {
42   const TypedValueRegion *R;
43 
44 public:
45   SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
46       : SymbolData(SymbolRegionValueKind, sym), R(r) {
47     assert(r);
48     assert(isValidTypeForSymbol(r->getValueType()));
49   }
50 
51   const TypedValueRegion* getRegion() const { return R; }
52 
53   static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
54     profile.AddInteger((unsigned) SymbolRegionValueKind);
55     profile.AddPointer(R);
56   }
57 
58   void Profile(llvm::FoldingSetNodeID& profile) override {
59     Profile(profile, R);
60   }
61 
62   StringRef getKindStr() const override;
63 
64   void dumpToStream(raw_ostream &os) const override;
65   const MemRegion *getOriginRegion() const override { return getRegion(); }
66 
67   QualType getType() const override;
68 
69   // Implement isa<T> support.
70   static bool classof(const SymExpr *SE) {
71     return SE->getKind() == SymbolRegionValueKind;
72   }
73 };
74 
75 /// A symbol representing the result of an expression in the case when we do
76 /// not know anything about what the expression is.
77 class SymbolConjured : public SymbolData {
78   const Stmt *S;
79   QualType T;
80   unsigned Count;
81   const LocationContext *LCtx;
82   const void *SymbolTag;
83 
84 public:
85   SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
86                  QualType t, unsigned count, const void *symbolTag)
87       : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
88         LCtx(lctx), SymbolTag(symbolTag) {
89     // FIXME: 's' might be a nullptr if we're conducting invalidation
90     // that was caused by a destructor call on a temporary object,
91     // which has no statement associated with it.
92     // Due to this, we might be creating the same invalidation symbol for
93     // two different invalidation passes (for two different temporaries).
94     assert(lctx);
95     assert(isValidTypeForSymbol(t));
96   }
97 
98   const Stmt *getStmt() const { return S; }
99   unsigned getCount() const { return Count; }
100   const void *getTag() const { return SymbolTag; }
101 
102   QualType getType() const override;
103 
104   StringRef getKindStr() const override;
105 
106   void dumpToStream(raw_ostream &os) const override;
107 
108   static void Profile(llvm::FoldingSetNodeID& profile, const Stmt *S,
109                       QualType T, unsigned Count, const LocationContext *LCtx,
110                       const void *SymbolTag) {
111     profile.AddInteger((unsigned) SymbolConjuredKind);
112     profile.AddPointer(S);
113     profile.AddPointer(LCtx);
114     profile.Add(T);
115     profile.AddInteger(Count);
116     profile.AddPointer(SymbolTag);
117   }
118 
119   void Profile(llvm::FoldingSetNodeID& profile) override {
120     Profile(profile, S, T, Count, LCtx, SymbolTag);
121   }
122 
123   // Implement isa<T> support.
124   static bool classof(const SymExpr *SE) {
125     return SE->getKind() == SymbolConjuredKind;
126   }
127 };
128 
129 /// A symbol representing the value of a MemRegion whose parent region has
130 /// symbolic value.
131 class SymbolDerived : public SymbolData {
132   SymbolRef parentSymbol;
133   const TypedValueRegion *R;
134 
135 public:
136   SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
137       : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
138     assert(parent);
139     assert(r);
140     assert(isValidTypeForSymbol(r->getValueType()));
141   }
142 
143   SymbolRef getParentSymbol() const { return parentSymbol; }
144   const TypedValueRegion *getRegion() const { return R; }
145 
146   QualType getType() const override;
147 
148   StringRef getKindStr() const override;
149 
150   void dumpToStream(raw_ostream &os) const override;
151   const MemRegion *getOriginRegion() const override { return getRegion(); }
152 
153   static void Profile(llvm::FoldingSetNodeID& profile, SymbolRef parent,
154                       const TypedValueRegion *r) {
155     profile.AddInteger((unsigned) SymbolDerivedKind);
156     profile.AddPointer(r);
157     profile.AddPointer(parent);
158   }
159 
160   void Profile(llvm::FoldingSetNodeID& profile) override {
161     Profile(profile, parentSymbol, R);
162   }
163 
164   // Implement isa<T> support.
165   static bool classof(const SymExpr *SE) {
166     return SE->getKind() == SymbolDerivedKind;
167   }
168 };
169 
170 /// SymbolExtent - Represents the extent (size in bytes) of a bounded region.
171 ///  Clients should not ask the SymbolManager for a region's extent. Always use
172 ///  SubRegion::getExtent instead -- the value returned may not be a symbol.
173 class SymbolExtent : public SymbolData {
174   const SubRegion *R;
175 
176 public:
177   SymbolExtent(SymbolID sym, const SubRegion *r)
178       : SymbolData(SymbolExtentKind, sym), R(r) {
179     assert(r);
180   }
181 
182   const SubRegion *getRegion() const { return R; }
183 
184   QualType getType() const override;
185 
186   StringRef getKindStr() const override;
187 
188   void dumpToStream(raw_ostream &os) const override;
189 
190   static void Profile(llvm::FoldingSetNodeID& profile, const SubRegion *R) {
191     profile.AddInteger((unsigned) SymbolExtentKind);
192     profile.AddPointer(R);
193   }
194 
195   void Profile(llvm::FoldingSetNodeID& profile) override {
196     Profile(profile, R);
197   }
198 
199   // Implement isa<T> support.
200   static bool classof(const SymExpr *SE) {
201     return SE->getKind() == SymbolExtentKind;
202   }
203 };
204 
205 /// SymbolMetadata - Represents path-dependent metadata about a specific region.
206 ///  Metadata symbols remain live as long as they are marked as in use before
207 ///  dead-symbol sweeping AND their associated regions are still alive.
208 ///  Intended for use by checkers.
209 class SymbolMetadata : public SymbolData {
210   const MemRegion* R;
211   const Stmt *S;
212   QualType T;
213   const LocationContext *LCtx;
214   unsigned Count;
215   const void *Tag;
216 
217 public:
218   SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
219                  const LocationContext *LCtx, unsigned count, const void *tag)
220       : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
221         Count(count), Tag(tag) {
222       assert(r);
223       assert(s);
224       assert(isValidTypeForSymbol(t));
225       assert(LCtx);
226       assert(tag);
227     }
228 
229   const MemRegion *getRegion() const { return R; }
230   const Stmt *getStmt() const { return S; }
231   const LocationContext *getLocationContext() const { return LCtx; }
232   unsigned getCount() const { return Count; }
233   const void *getTag() const { return Tag; }
234 
235   QualType getType() const override;
236 
237   StringRef getKindStr() const override;
238 
239   void dumpToStream(raw_ostream &os) const override;
240 
241   static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
242                       const Stmt *S, QualType T, const LocationContext *LCtx,
243                       unsigned Count, const void *Tag) {
244     profile.AddInteger((unsigned) SymbolMetadataKind);
245     profile.AddPointer(R);
246     profile.AddPointer(S);
247     profile.Add(T);
248     profile.AddPointer(LCtx);
249     profile.AddInteger(Count);
250     profile.AddPointer(Tag);
251   }
252 
253   void Profile(llvm::FoldingSetNodeID& profile) override {
254     Profile(profile, R, S, T, LCtx, Count, Tag);
255   }
256 
257   // Implement isa<T> support.
258   static bool classof(const SymExpr *SE) {
259     return SE->getKind() == SymbolMetadataKind;
260   }
261 };
262 
263 /// Represents a cast expression.
264 class SymbolCast : public SymExpr {
265   const SymExpr *Operand;
266 
267   /// Type of the operand.
268   QualType FromTy;
269 
270   /// The type of the result.
271   QualType ToTy;
272 
273 public:
274   SymbolCast(const SymExpr *In, QualType From, QualType To)
275       : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
276     assert(In);
277     assert(isValidTypeForSymbol(From));
278     // FIXME: GenericTaintChecker creates symbols of void type.
279     // Otherwise, 'To' should also be a valid type.
280   }
281 
282   unsigned computeComplexity() const override {
283     if (Complexity == 0)
284       Complexity = 1 + Operand->computeComplexity();
285     return Complexity;
286   }
287 
288   QualType getType() const override { return ToTy; }
289 
290   const SymExpr *getOperand() const { return Operand; }
291 
292   void dumpToStream(raw_ostream &os) const override;
293 
294   static void Profile(llvm::FoldingSetNodeID& ID,
295                       const SymExpr *In, QualType From, QualType To) {
296     ID.AddInteger((unsigned) SymbolCastKind);
297     ID.AddPointer(In);
298     ID.Add(From);
299     ID.Add(To);
300   }
301 
302   void Profile(llvm::FoldingSetNodeID& ID) override {
303     Profile(ID, Operand, FromTy, ToTy);
304   }
305 
306   // Implement isa<T> support.
307   static bool classof(const SymExpr *SE) {
308     return SE->getKind() == SymbolCastKind;
309   }
310 };
311 
312 /// Represents a symbolic expression involving a binary operator
313 class BinarySymExpr : public SymExpr {
314   BinaryOperator::Opcode Op;
315   QualType T;
316 
317 protected:
318   BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
319       : SymExpr(k), Op(op), T(t) {
320     assert(classof(this));
321     // Binary expressions are results of arithmetic. Pointer arithmetic is not
322     // handled by binary expressions, but it is instead handled by applying
323     // sub-regions to regions.
324     assert(isValidTypeForSymbol(t) && !Loc::isLocType(t));
325   }
326 
327 public:
328   // FIXME: We probably need to make this out-of-line to avoid redundant
329   // generation of virtual functions.
330   QualType getType() const override { return T; }
331 
332   BinaryOperator::Opcode getOpcode() const { return Op; }
333 
334   // Implement isa<T> support.
335   static bool classof(const SymExpr *SE) {
336     Kind k = SE->getKind();
337     return k >= BEGIN_BINARYSYMEXPRS && k <= END_BINARYSYMEXPRS;
338   }
339 
340 protected:
341   static unsigned computeOperandComplexity(const SymExpr *Value) {
342     return Value->computeComplexity();
343   }
344   static unsigned computeOperandComplexity(const llvm::APSInt &Value) {
345     return 1;
346   }
347 
348   static const llvm::APSInt *getPointer(const llvm::APSInt &Value) {
349     return &Value;
350   }
351   static const SymExpr *getPointer(const SymExpr *Value) { return Value; }
352 
353   static void dumpToStreamImpl(raw_ostream &os, const SymExpr *Value);
354   static void dumpToStreamImpl(raw_ostream &os, const llvm::APSInt &Value);
355   static void dumpToStreamImpl(raw_ostream &os, BinaryOperator::Opcode op);
356 };
357 
358 /// Template implementation for all binary symbolic expressions
359 template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassKind>
360 class BinarySymExprImpl : public BinarySymExpr {
361   LHSTYPE LHS;
362   RHSTYPE RHS;
363 
364 public:
365   BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
366                     QualType t)
367       : BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
368     assert(getPointer(lhs));
369     assert(getPointer(rhs));
370   }
371 
372   void dumpToStream(raw_ostream &os) const override {
373     dumpToStreamImpl(os, LHS);
374     dumpToStreamImpl(os, getOpcode());
375     dumpToStreamImpl(os, RHS);
376   }
377 
378   LHSTYPE getLHS() const { return LHS; }
379   RHSTYPE getRHS() const { return RHS; }
380 
381   unsigned computeComplexity() const override {
382     if (Complexity == 0)
383       Complexity =
384           computeOperandComplexity(RHS) + computeOperandComplexity(LHS);
385     return Complexity;
386   }
387 
388   static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs,
389                       BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) {
390     ID.AddInteger((unsigned)ClassKind);
391     ID.AddPointer(getPointer(lhs));
392     ID.AddInteger(op);
393     ID.AddPointer(getPointer(rhs));
394     ID.Add(t);
395   }
396 
397   void Profile(llvm::FoldingSetNodeID &ID) override {
398     Profile(ID, LHS, getOpcode(), RHS, getType());
399   }
400 
401   // Implement isa<T> support.
402   static bool classof(const SymExpr *SE) { return SE->getKind() == ClassKind; }
403 };
404 
405 /// Represents a symbolic expression like 'x' + 3.
406 using SymIntExpr = BinarySymExprImpl<const SymExpr *, const llvm::APSInt &,
407                                      SymExpr::Kind::SymIntExprKind>;
408 
409 /// Represents a symbolic expression like 3 - 'x'.
410 using IntSymExpr = BinarySymExprImpl<const llvm::APSInt &, const SymExpr *,
411                                      SymExpr::Kind::IntSymExprKind>;
412 
413 /// Represents a symbolic expression like 'x' + 'y'.
414 using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *,
415                                      SymExpr::Kind::SymSymExprKind>;
416 
417 class SymbolManager {
418   using DataSetTy = llvm::FoldingSet<SymExpr>;
419   using SymbolDependTy =
420       llvm::DenseMap<SymbolRef, std::unique_ptr<SymbolRefSmallVectorTy>>;
421 
422   DataSetTy DataSet;
423 
424   /// Stores the extra dependencies between symbols: the data should be kept
425   /// alive as long as the key is live.
426   SymbolDependTy SymbolDependencies;
427 
428   unsigned SymbolCounter = 0;
429   llvm::BumpPtrAllocator& BPAlloc;
430   BasicValueFactory &BV;
431   ASTContext &Ctx;
432 
433 public:
434   SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
435                 llvm::BumpPtrAllocator& bpalloc)
436       : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
437 
438   static bool canSymbolicate(QualType T);
439 
440   /// Make a unique symbol for MemRegion R according to its kind.
441   const SymbolRegionValue* getRegionValueSymbol(const TypedValueRegion* R);
442 
443   const SymbolConjured* conjureSymbol(const Stmt *E,
444                                       const LocationContext *LCtx,
445                                       QualType T,
446                                       unsigned VisitCount,
447                                       const void *SymbolTag = nullptr);
448 
449   const SymbolConjured* conjureSymbol(const Expr *E,
450                                       const LocationContext *LCtx,
451                                       unsigned VisitCount,
452                                       const void *SymbolTag = nullptr) {
453     return conjureSymbol(E, LCtx, E->getType(), VisitCount, SymbolTag);
454   }
455 
456   const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
457                                         const TypedValueRegion *R);
458 
459   const SymbolExtent *getExtentSymbol(const SubRegion *R);
460 
461   /// Creates a metadata symbol associated with a specific region.
462   ///
463   /// VisitCount can be used to differentiate regions corresponding to
464   /// different loop iterations, thus, making the symbol path-dependent.
465   const SymbolMetadata *getMetadataSymbol(const MemRegion *R, const Stmt *S,
466                                           QualType T,
467                                           const LocationContext *LCtx,
468                                           unsigned VisitCount,
469                                           const void *SymbolTag = nullptr);
470 
471   const SymbolCast* getCastSymbol(const SymExpr *Operand,
472                                   QualType From, QualType To);
473 
474   const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
475                                   const llvm::APSInt& rhs, QualType t);
476 
477   const SymIntExpr *getSymIntExpr(const SymExpr &lhs, BinaryOperator::Opcode op,
478                                   const llvm::APSInt& rhs, QualType t) {
479     return getSymIntExpr(&lhs, op, rhs, t);
480   }
481 
482   const IntSymExpr *getIntSymExpr(const llvm::APSInt& lhs,
483                                   BinaryOperator::Opcode op,
484                                   const SymExpr *rhs, QualType t);
485 
486   const SymSymExpr *getSymSymExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
487                                   const SymExpr *rhs, QualType t);
488 
489   QualType getType(const SymExpr *SE) const {
490     return SE->getType();
491   }
492 
493   /// Add artificial symbol dependency.
494   ///
495   /// The dependent symbol should stay alive as long as the primary is alive.
496   void addSymbolDependency(const SymbolRef Primary, const SymbolRef Dependent);
497 
498   const SymbolRefSmallVectorTy *getDependentSymbols(const SymbolRef Primary);
499 
500   ASTContext &getContext() { return Ctx; }
501   BasicValueFactory &getBasicVals() { return BV; }
502 };
503 
504 /// A class responsible for cleaning up unused symbols.
505 class SymbolReaper {
506   enum SymbolStatus {
507     NotProcessed,
508     HaveMarkedDependents
509   };
510 
511   using SymbolSetTy = llvm::DenseSet<SymbolRef>;
512   using SymbolMapTy = llvm::DenseMap<SymbolRef, SymbolStatus>;
513   using RegionSetTy = llvm::DenseSet<const MemRegion *>;
514 
515   SymbolMapTy TheLiving;
516   SymbolSetTy MetadataInUse;
517 
518   RegionSetTy RegionRoots;
519 
520   const StackFrameContext *LCtx;
521   const Stmt *Loc;
522   SymbolManager& SymMgr;
523   StoreRef reapedStore;
524   llvm::DenseMap<const MemRegion *, unsigned> includedRegionCache;
525 
526 public:
527   /// Construct a reaper object, which removes everything which is not
528   /// live before we execute statement s in the given location context.
529   ///
530   /// If the statement is NULL, everything is this and parent contexts is
531   /// considered live.
532   /// If the stack frame context is NULL, everything on stack is considered
533   /// dead.
534   SymbolReaper(const StackFrameContext *Ctx, const Stmt *s,
535                SymbolManager &symmgr, StoreManager &storeMgr)
536       : LCtx(Ctx), Loc(s), SymMgr(symmgr), reapedStore(nullptr, storeMgr) {}
537 
538   const LocationContext *getLocationContext() const { return LCtx; }
539 
540   bool isLive(SymbolRef sym);
541   bool isLiveRegion(const MemRegion *region);
542   bool isLive(const Expr *ExprVal, const LocationContext *LCtx) const;
543   bool isLive(const VarRegion *VR, bool includeStoreBindings = false) const;
544 
545   /// Unconditionally marks a symbol as live.
546   ///
547   /// This should never be
548   /// used by checkers, only by the state infrastructure such as the store and
549   /// environment. Checkers should instead use metadata symbols and markInUse.
550   void markLive(SymbolRef sym);
551 
552   /// Marks a symbol as important to a checker.
553   ///
554   /// For metadata symbols,
555   /// this will keep the symbol alive as long as its associated region is also
556   /// live. For other symbols, this has no effect; checkers are not permitted
557   /// to influence the life of other symbols. This should be used before any
558   /// symbol marking has occurred, i.e. in the MarkLiveSymbols callback.
559   void markInUse(SymbolRef sym);
560 
561   using region_iterator = RegionSetTy::const_iterator;
562 
563   region_iterator region_begin() const { return RegionRoots.begin(); }
564   region_iterator region_end() const { return RegionRoots.end(); }
565 
566   /// Returns whether or not a symbol has been confirmed dead.
567   ///
568   /// This should only be called once all marking of dead symbols has completed.
569   /// (For checkers, this means only in the checkDeadSymbols callback.)
570   bool isDead(SymbolRef sym) {
571     return !isLive(sym);
572   }
573 
574   void markLive(const MemRegion *region);
575   void markElementIndicesLive(const MemRegion *region);
576 
577   /// Set to the value of the symbolic store after
578   /// StoreManager::removeDeadBindings has been called.
579   void setReapedStore(StoreRef st) { reapedStore = st; }
580 
581 private:
582   /// Mark the symbols dependent on the input symbol as live.
583   void markDependentsLive(SymbolRef sym);
584 };
585 
586 class SymbolVisitor {
587 protected:
588   ~SymbolVisitor() = default;
589 
590 public:
591   SymbolVisitor() = default;
592   SymbolVisitor(const SymbolVisitor &) = default;
593   SymbolVisitor(SymbolVisitor &&) {}
594 
595   /// A visitor method invoked by ProgramStateManager::scanReachableSymbols.
596   ///
597   /// The method returns \c true if symbols should continue be scanned and \c
598   /// false otherwise.
599   virtual bool VisitSymbol(SymbolRef sym) = 0;
600   virtual bool VisitMemRegion(const MemRegion *) { return true; }
601 };
602 
603 } // namespace ento
604 
605 } // namespace clang
606 
607 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
608