1 //= ProgramState.cpp - Path-Sensitive "State" for tracking values --*- C++ -*--=
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements ProgramState and ProgramStateManager.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
16 #include "clang/Analysis/CFG.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 using namespace clang;
25 using namespace ento;
26 
27 namespace clang { namespace  ento {
28 /// Increments the number of times this state is referenced.
29 
ProgramStateRetain(const ProgramState * state)30 void ProgramStateRetain(const ProgramState *state) {
31   ++const_cast<ProgramState*>(state)->refCount;
32 }
33 
34 /// Decrement the number of times this state is referenced.
ProgramStateRelease(const ProgramState * state)35 void ProgramStateRelease(const ProgramState *state) {
36   assert(state->refCount > 0);
37   ProgramState *s = const_cast<ProgramState*>(state);
38   if (--s->refCount == 0) {
39     ProgramStateManager &Mgr = s->getStateManager();
40     Mgr.StateSet.RemoveNode(s);
41     s->~ProgramState();
42     Mgr.freeStates.push_back(s);
43   }
44 }
45 }}
46 
ProgramState(ProgramStateManager * mgr,const Environment & env,StoreRef st,GenericDataMap gdm)47 ProgramState::ProgramState(ProgramStateManager *mgr, const Environment& env,
48                  StoreRef st, GenericDataMap gdm)
49   : stateMgr(mgr),
50     Env(env),
51     store(st.getStore()),
52     GDM(gdm),
53     refCount(0) {
54   stateMgr->getStoreManager().incrementReferenceCount(store);
55 }
56 
ProgramState(const ProgramState & RHS)57 ProgramState::ProgramState(const ProgramState &RHS)
58     : llvm::FoldingSetNode(),
59       stateMgr(RHS.stateMgr),
60       Env(RHS.Env),
61       store(RHS.store),
62       GDM(RHS.GDM),
63       refCount(0) {
64   stateMgr->getStoreManager().incrementReferenceCount(store);
65 }
66 
~ProgramState()67 ProgramState::~ProgramState() {
68   if (store)
69     stateMgr->getStoreManager().decrementReferenceCount(store);
70 }
71 
getID() const72 int64_t ProgramState::getID() const {
73   return getStateManager().Alloc.identifyKnownAlignedObject<ProgramState>(this);
74 }
75 
ProgramStateManager(ASTContext & Ctx,StoreManagerCreator CreateSMgr,ConstraintManagerCreator CreateCMgr,llvm::BumpPtrAllocator & alloc,SubEngine * SubEng)76 ProgramStateManager::ProgramStateManager(ASTContext &Ctx,
77                                          StoreManagerCreator CreateSMgr,
78                                          ConstraintManagerCreator CreateCMgr,
79                                          llvm::BumpPtrAllocator &alloc,
80                                          SubEngine *SubEng)
81   : Eng(SubEng), EnvMgr(alloc), GDMFactory(alloc),
82     svalBuilder(createSimpleSValBuilder(alloc, Ctx, *this)),
83     CallEventMgr(new CallEventManager(alloc)), Alloc(alloc) {
84   StoreMgr = (*CreateSMgr)(*this);
85   ConstraintMgr = (*CreateCMgr)(*this, SubEng);
86 }
87 
88 
~ProgramStateManager()89 ProgramStateManager::~ProgramStateManager() {
90   for (GDMContextsTy::iterator I=GDMContexts.begin(), E=GDMContexts.end();
91        I!=E; ++I)
92     I->second.second(I->second.first);
93 }
94 
95 ProgramStateRef
removeDeadBindings(ProgramStateRef state,const StackFrameContext * LCtx,SymbolReaper & SymReaper)96 ProgramStateManager::removeDeadBindings(ProgramStateRef state,
97                                    const StackFrameContext *LCtx,
98                                    SymbolReaper& SymReaper) {
99 
100   // This code essentially performs a "mark-and-sweep" of the VariableBindings.
101   // The roots are any Block-level exprs and Decls that our liveness algorithm
102   // tells us are live.  We then see what Decls they may reference, and keep
103   // those around.  This code more than likely can be made faster, and the
104   // frequency of which this method is called should be experimented with
105   // for optimum performance.
106   ProgramState NewState = *state;
107 
108   NewState.Env = EnvMgr.removeDeadBindings(NewState.Env, SymReaper, state);
109 
110   // Clean up the store.
111   StoreRef newStore = StoreMgr->removeDeadBindings(NewState.getStore(), LCtx,
112                                                    SymReaper);
113   NewState.setStore(newStore);
114   SymReaper.setReapedStore(newStore);
115 
116   ProgramStateRef Result = getPersistentState(NewState);
117   return ConstraintMgr->removeDeadBindings(Result, SymReaper);
118 }
119 
bindLoc(Loc LV,SVal V,const LocationContext * LCtx,bool notifyChanges) const120 ProgramStateRef ProgramState::bindLoc(Loc LV,
121                                       SVal V,
122                                       const LocationContext *LCtx,
123                                       bool notifyChanges) const {
124   ProgramStateManager &Mgr = getStateManager();
125   ProgramStateRef newState = makeWithStore(Mgr.StoreMgr->Bind(getStore(),
126                                                              LV, V));
127   const MemRegion *MR = LV.getAsRegion();
128   if (MR && notifyChanges)
129     return Mgr.getOwningEngine().processRegionChange(newState, MR, LCtx);
130 
131   return newState;
132 }
133 
134 ProgramStateRef
bindDefaultInitial(SVal loc,SVal V,const LocationContext * LCtx) const135 ProgramState::bindDefaultInitial(SVal loc, SVal V,
136                                  const LocationContext *LCtx) const {
137   ProgramStateManager &Mgr = getStateManager();
138   const MemRegion *R = loc.castAs<loc::MemRegionVal>().getRegion();
139   const StoreRef &newStore = Mgr.StoreMgr->BindDefaultInitial(getStore(), R, V);
140   ProgramStateRef new_state = makeWithStore(newStore);
141   return Mgr.getOwningEngine().processRegionChange(new_state, R, LCtx);
142 }
143 
144 ProgramStateRef
bindDefaultZero(SVal loc,const LocationContext * LCtx) const145 ProgramState::bindDefaultZero(SVal loc, const LocationContext *LCtx) const {
146   ProgramStateManager &Mgr = getStateManager();
147   const MemRegion *R = loc.castAs<loc::MemRegionVal>().getRegion();
148   const StoreRef &newStore = Mgr.StoreMgr->BindDefaultZero(getStore(), R);
149   ProgramStateRef new_state = makeWithStore(newStore);
150   return Mgr.getOwningEngine().processRegionChange(new_state, R, LCtx);
151 }
152 
153 typedef ArrayRef<const MemRegion *> RegionList;
154 typedef ArrayRef<SVal> ValueList;
155 
156 ProgramStateRef
invalidateRegions(RegionList Regions,const Expr * E,unsigned Count,const LocationContext * LCtx,bool CausedByPointerEscape,InvalidatedSymbols * IS,const CallEvent * Call,RegionAndSymbolInvalidationTraits * ITraits) const157 ProgramState::invalidateRegions(RegionList Regions,
158                              const Expr *E, unsigned Count,
159                              const LocationContext *LCtx,
160                              bool CausedByPointerEscape,
161                              InvalidatedSymbols *IS,
162                              const CallEvent *Call,
163                              RegionAndSymbolInvalidationTraits *ITraits) const {
164   SmallVector<SVal, 8> Values;
165   for (RegionList::const_iterator I = Regions.begin(),
166                                   End = Regions.end(); I != End; ++I)
167     Values.push_back(loc::MemRegionVal(*I));
168 
169   return invalidateRegionsImpl(Values, E, Count, LCtx, CausedByPointerEscape,
170                                IS, ITraits, Call);
171 }
172 
173 ProgramStateRef
invalidateRegions(ValueList Values,const Expr * E,unsigned Count,const LocationContext * LCtx,bool CausedByPointerEscape,InvalidatedSymbols * IS,const CallEvent * Call,RegionAndSymbolInvalidationTraits * ITraits) const174 ProgramState::invalidateRegions(ValueList Values,
175                              const Expr *E, unsigned Count,
176                              const LocationContext *LCtx,
177                              bool CausedByPointerEscape,
178                              InvalidatedSymbols *IS,
179                              const CallEvent *Call,
180                              RegionAndSymbolInvalidationTraits *ITraits) const {
181 
182   return invalidateRegionsImpl(Values, E, Count, LCtx, CausedByPointerEscape,
183                                IS, ITraits, Call);
184 }
185 
186 ProgramStateRef
invalidateRegionsImpl(ValueList Values,const Expr * E,unsigned Count,const LocationContext * LCtx,bool CausedByPointerEscape,InvalidatedSymbols * IS,RegionAndSymbolInvalidationTraits * ITraits,const CallEvent * Call) const187 ProgramState::invalidateRegionsImpl(ValueList Values,
188                                     const Expr *E, unsigned Count,
189                                     const LocationContext *LCtx,
190                                     bool CausedByPointerEscape,
191                                     InvalidatedSymbols *IS,
192                                     RegionAndSymbolInvalidationTraits *ITraits,
193                                     const CallEvent *Call) const {
194   ProgramStateManager &Mgr = getStateManager();
195   SubEngine &Eng = Mgr.getOwningEngine();
196 
197   InvalidatedSymbols InvalidatedSyms;
198   if (!IS)
199     IS = &InvalidatedSyms;
200 
201   RegionAndSymbolInvalidationTraits ITraitsLocal;
202   if (!ITraits)
203     ITraits = &ITraitsLocal;
204 
205   StoreManager::InvalidatedRegions TopLevelInvalidated;
206   StoreManager::InvalidatedRegions Invalidated;
207   const StoreRef &newStore
208   = Mgr.StoreMgr->invalidateRegions(getStore(), Values, E, Count, LCtx, Call,
209                                     *IS, *ITraits, &TopLevelInvalidated,
210                                     &Invalidated);
211 
212   ProgramStateRef newState = makeWithStore(newStore);
213 
214   if (CausedByPointerEscape) {
215     newState = Eng.notifyCheckersOfPointerEscape(newState, IS,
216                                                  TopLevelInvalidated,
217                                                  Call,
218                                                  *ITraits);
219   }
220 
221   return Eng.processRegionChanges(newState, IS, TopLevelInvalidated,
222                                   Invalidated, LCtx, Call);
223 }
224 
killBinding(Loc LV) const225 ProgramStateRef ProgramState::killBinding(Loc LV) const {
226   assert(!LV.getAs<loc::MemRegionVal>() && "Use invalidateRegion instead.");
227 
228   Store OldStore = getStore();
229   const StoreRef &newStore =
230     getStateManager().StoreMgr->killBinding(OldStore, LV);
231 
232   if (newStore.getStore() == OldStore)
233     return this;
234 
235   return makeWithStore(newStore);
236 }
237 
238 ProgramStateRef
enterStackFrame(const CallEvent & Call,const StackFrameContext * CalleeCtx) const239 ProgramState::enterStackFrame(const CallEvent &Call,
240                               const StackFrameContext *CalleeCtx) const {
241   const StoreRef &NewStore =
242     getStateManager().StoreMgr->enterStackFrame(getStore(), Call, CalleeCtx);
243   return makeWithStore(NewStore);
244 }
245 
getSValAsScalarOrLoc(const MemRegion * R) const246 SVal ProgramState::getSValAsScalarOrLoc(const MemRegion *R) const {
247   // We only want to do fetches from regions that we can actually bind
248   // values.  For example, SymbolicRegions of type 'id<...>' cannot
249   // have direct bindings (but their can be bindings on their subregions).
250   if (!R->isBoundable())
251     return UnknownVal();
252 
253   if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R)) {
254     QualType T = TR->getValueType();
255     if (Loc::isLocType(T) || T->isIntegralOrEnumerationType())
256       return getSVal(R);
257   }
258 
259   return UnknownVal();
260 }
261 
getSVal(Loc location,QualType T) const262 SVal ProgramState::getSVal(Loc location, QualType T) const {
263   SVal V = getRawSVal(location, T);
264 
265   // If 'V' is a symbolic value that is *perfectly* constrained to
266   // be a constant value, use that value instead to lessen the burden
267   // on later analysis stages (so we have less symbolic values to reason
268   // about).
269   // We only go into this branch if we can convert the APSInt value we have
270   // to the type of T, which is not always the case (e.g. for void).
271   if (!T.isNull() && (T->isIntegralOrEnumerationType() || Loc::isLocType(T))) {
272     if (SymbolRef sym = V.getAsSymbol()) {
273       if (const llvm::APSInt *Int = getStateManager()
274                                     .getConstraintManager()
275                                     .getSymVal(this, sym)) {
276         // FIXME: Because we don't correctly model (yet) sign-extension
277         // and truncation of symbolic values, we need to convert
278         // the integer value to the correct signedness and bitwidth.
279         //
280         // This shows up in the following:
281         //
282         //   char foo();
283         //   unsigned x = foo();
284         //   if (x == 54)
285         //     ...
286         //
287         //  The symbolic value stored to 'x' is actually the conjured
288         //  symbol for the call to foo(); the type of that symbol is 'char',
289         //  not unsigned.
290         const llvm::APSInt &NewV = getBasicVals().Convert(T, *Int);
291 
292         if (V.getAs<Loc>())
293           return loc::ConcreteInt(NewV);
294         else
295           return nonloc::ConcreteInt(NewV);
296       }
297     }
298   }
299 
300   return V;
301 }
302 
BindExpr(const Stmt * S,const LocationContext * LCtx,SVal V,bool Invalidate) const303 ProgramStateRef ProgramState::BindExpr(const Stmt *S,
304                                            const LocationContext *LCtx,
305                                            SVal V, bool Invalidate) const{
306   Environment NewEnv =
307     getStateManager().EnvMgr.bindExpr(Env, EnvironmentEntry(S, LCtx), V,
308                                       Invalidate);
309   if (NewEnv == Env)
310     return this;
311 
312   ProgramState NewSt = *this;
313   NewSt.Env = NewEnv;
314   return getStateManager().getPersistentState(NewSt);
315 }
316 
assumeInBound(DefinedOrUnknownSVal Idx,DefinedOrUnknownSVal UpperBound,bool Assumption,QualType indexTy) const317 ProgramStateRef ProgramState::assumeInBound(DefinedOrUnknownSVal Idx,
318                                       DefinedOrUnknownSVal UpperBound,
319                                       bool Assumption,
320                                       QualType indexTy) const {
321   if (Idx.isUnknown() || UpperBound.isUnknown())
322     return this;
323 
324   // Build an expression for 0 <= Idx < UpperBound.
325   // This is the same as Idx + MIN < UpperBound + MIN, if overflow is allowed.
326   // FIXME: This should probably be part of SValBuilder.
327   ProgramStateManager &SM = getStateManager();
328   SValBuilder &svalBuilder = SM.getSValBuilder();
329   ASTContext &Ctx = svalBuilder.getContext();
330 
331   // Get the offset: the minimum value of the array index type.
332   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
333   if (indexTy.isNull())
334     indexTy = svalBuilder.getArrayIndexType();
335   nonloc::ConcreteInt Min(BVF.getMinValue(indexTy));
336 
337   // Adjust the index.
338   SVal newIdx = svalBuilder.evalBinOpNN(this, BO_Add,
339                                         Idx.castAs<NonLoc>(), Min, indexTy);
340   if (newIdx.isUnknownOrUndef())
341     return this;
342 
343   // Adjust the upper bound.
344   SVal newBound =
345     svalBuilder.evalBinOpNN(this, BO_Add, UpperBound.castAs<NonLoc>(),
346                             Min, indexTy);
347 
348   if (newBound.isUnknownOrUndef())
349     return this;
350 
351   // Build the actual comparison.
352   SVal inBound = svalBuilder.evalBinOpNN(this, BO_LT, newIdx.castAs<NonLoc>(),
353                                          newBound.castAs<NonLoc>(), Ctx.IntTy);
354   if (inBound.isUnknownOrUndef())
355     return this;
356 
357   // Finally, let the constraint manager take care of it.
358   ConstraintManager &CM = SM.getConstraintManager();
359   return CM.assume(this, inBound.castAs<DefinedSVal>(), Assumption);
360 }
361 
isNonNull(SVal V) const362 ConditionTruthVal ProgramState::isNonNull(SVal V) const {
363   ConditionTruthVal IsNull = isNull(V);
364   if (IsNull.isUnderconstrained())
365     return IsNull;
366   return ConditionTruthVal(!IsNull.getValue());
367 }
368 
areEqual(SVal Lhs,SVal Rhs) const369 ConditionTruthVal ProgramState::areEqual(SVal Lhs, SVal Rhs) const {
370   return stateMgr->getSValBuilder().areEqual(this, Lhs, Rhs);
371 }
372 
isNull(SVal V) const373 ConditionTruthVal ProgramState::isNull(SVal V) const {
374   if (V.isZeroConstant())
375     return true;
376 
377   if (V.isConstant())
378     return false;
379 
380   SymbolRef Sym = V.getAsSymbol(/* IncludeBaseRegion */ true);
381   if (!Sym)
382     return ConditionTruthVal();
383 
384   return getStateManager().ConstraintMgr->isNull(this, Sym);
385 }
386 
getInitialState(const LocationContext * InitLoc)387 ProgramStateRef ProgramStateManager::getInitialState(const LocationContext *InitLoc) {
388   ProgramState State(this,
389                 EnvMgr.getInitialEnvironment(),
390                 StoreMgr->getInitialStore(InitLoc),
391                 GDMFactory.getEmptyMap());
392 
393   return getPersistentState(State);
394 }
395 
getPersistentStateWithGDM(ProgramStateRef FromState,ProgramStateRef GDMState)396 ProgramStateRef ProgramStateManager::getPersistentStateWithGDM(
397                                                      ProgramStateRef FromState,
398                                                      ProgramStateRef GDMState) {
399   ProgramState NewState(*FromState);
400   NewState.GDM = GDMState->GDM;
401   return getPersistentState(NewState);
402 }
403 
getPersistentState(ProgramState & State)404 ProgramStateRef ProgramStateManager::getPersistentState(ProgramState &State) {
405 
406   llvm::FoldingSetNodeID ID;
407   State.Profile(ID);
408   void *InsertPos;
409 
410   if (ProgramState *I = StateSet.FindNodeOrInsertPos(ID, InsertPos))
411     return I;
412 
413   ProgramState *newState = nullptr;
414   if (!freeStates.empty()) {
415     newState = freeStates.back();
416     freeStates.pop_back();
417   }
418   else {
419     newState = (ProgramState*) Alloc.Allocate<ProgramState>();
420   }
421   new (newState) ProgramState(State);
422   StateSet.InsertNode(newState, InsertPos);
423   return newState;
424 }
425 
makeWithStore(const StoreRef & store) const426 ProgramStateRef ProgramState::makeWithStore(const StoreRef &store) const {
427   ProgramState NewSt(*this);
428   NewSt.setStore(store);
429   return getStateManager().getPersistentState(NewSt);
430 }
431 
setStore(const StoreRef & newStore)432 void ProgramState::setStore(const StoreRef &newStore) {
433   Store newStoreStore = newStore.getStore();
434   if (newStoreStore)
435     stateMgr->getStoreManager().incrementReferenceCount(newStoreStore);
436   if (store)
437     stateMgr->getStoreManager().decrementReferenceCount(store);
438   store = newStoreStore;
439 }
440 
441 //===----------------------------------------------------------------------===//
442 //  State pretty-printing.
443 //===----------------------------------------------------------------------===//
444 
print(raw_ostream & Out,const char * NL,const char * Sep,const LocationContext * LC) const445 void ProgramState::print(raw_ostream &Out,
446                          const char *NL, const char *Sep,
447                          const LocationContext *LC) const {
448   // Print the store.
449   ProgramStateManager &Mgr = getStateManager();
450   const ASTContext &Context = getStateManager().getContext();
451   Mgr.getStoreManager().print(getStore(), Out, NL);
452 
453   // Print out the environment.
454   Env.print(Out, NL, Sep, Context, LC);
455 
456   // Print out the constraints.
457   Mgr.getConstraintManager().print(this, Out, NL, Sep);
458 
459   // Print out the tracked dynamic types.
460   printDynamicTypeInfo(this, Out, NL, Sep);
461 
462   // Print out tainted symbols.
463   printTaint(Out, NL);
464 
465   // Print checker-specific data.
466   Mgr.getOwningEngine().printState(Out, this, NL, Sep, LC);
467 }
468 
printDOT(raw_ostream & Out,const LocationContext * LC) const469 void ProgramState::printDOT(raw_ostream &Out,
470                             const LocationContext *LC) const {
471   print(Out, "\\l", "\\|", LC);
472 }
473 
dump() const474 LLVM_DUMP_METHOD void ProgramState::dump() const {
475   print(llvm::errs());
476 }
477 
printTaint(raw_ostream & Out,const char * NL) const478 void ProgramState::printTaint(raw_ostream &Out,
479                               const char *NL) const {
480   TaintMapImpl TM = get<TaintMap>();
481 
482   if (!TM.isEmpty())
483     Out <<"Tainted symbols:" << NL;
484 
485   for (TaintMapImpl::iterator I = TM.begin(), E = TM.end(); I != E; ++I) {
486     Out << I->first << " : " << I->second << NL;
487   }
488 }
489 
dumpTaint() const490 void ProgramState::dumpTaint() const {
491   printTaint(llvm::errs());
492 }
493 
getAnalysisManager() const494 AnalysisManager& ProgramState::getAnalysisManager() const {
495   return stateMgr->getOwningEngine().getAnalysisManager();
496 }
497 
498 //===----------------------------------------------------------------------===//
499 // Generic Data Map.
500 //===----------------------------------------------------------------------===//
501 
FindGDM(void * K) const502 void *const* ProgramState::FindGDM(void *K) const {
503   return GDM.lookup(K);
504 }
505 
506 void*
FindGDMContext(void * K,void * (* CreateContext)(llvm::BumpPtrAllocator &),void (* DeleteContext)(void *))507 ProgramStateManager::FindGDMContext(void *K,
508                                void *(*CreateContext)(llvm::BumpPtrAllocator&),
509                                void (*DeleteContext)(void*)) {
510 
511   std::pair<void*, void (*)(void*)>& p = GDMContexts[K];
512   if (!p.first) {
513     p.first = CreateContext(Alloc);
514     p.second = DeleteContext;
515   }
516 
517   return p.first;
518 }
519 
addGDM(ProgramStateRef St,void * Key,void * Data)520 ProgramStateRef ProgramStateManager::addGDM(ProgramStateRef St, void *Key, void *Data){
521   ProgramState::GenericDataMap M1 = St->getGDM();
522   ProgramState::GenericDataMap M2 = GDMFactory.add(M1, Key, Data);
523 
524   if (M1 == M2)
525     return St;
526 
527   ProgramState NewSt = *St;
528   NewSt.GDM = M2;
529   return getPersistentState(NewSt);
530 }
531 
removeGDM(ProgramStateRef state,void * Key)532 ProgramStateRef ProgramStateManager::removeGDM(ProgramStateRef state, void *Key) {
533   ProgramState::GenericDataMap OldM = state->getGDM();
534   ProgramState::GenericDataMap NewM = GDMFactory.remove(OldM, Key);
535 
536   if (NewM == OldM)
537     return state;
538 
539   ProgramState NewState = *state;
540   NewState.GDM = NewM;
541   return getPersistentState(NewState);
542 }
543 
scan(nonloc::LazyCompoundVal val)544 bool ScanReachableSymbols::scan(nonloc::LazyCompoundVal val) {
545   bool wasVisited = !visited.insert(val.getCVData()).second;
546   if (wasVisited)
547     return true;
548 
549   StoreManager &StoreMgr = state->getStateManager().getStoreManager();
550   // FIXME: We don't really want to use getBaseRegion() here because pointer
551   // arithmetic doesn't apply, but scanReachableSymbols only accepts base
552   // regions right now.
553   const MemRegion *R = val.getRegion()->getBaseRegion();
554   return StoreMgr.scanReachableSymbols(val.getStore(), R, *this);
555 }
556 
scan(nonloc::CompoundVal val)557 bool ScanReachableSymbols::scan(nonloc::CompoundVal val) {
558   for (nonloc::CompoundVal::iterator I=val.begin(), E=val.end(); I!=E; ++I)
559     if (!scan(*I))
560       return false;
561 
562   return true;
563 }
564 
scan(const SymExpr * sym)565 bool ScanReachableSymbols::scan(const SymExpr *sym) {
566   for (SymExpr::symbol_iterator SI = sym->symbol_begin(),
567                                 SE = sym->symbol_end();
568        SI != SE; ++SI) {
569     bool wasVisited = !visited.insert(*SI).second;
570     if (wasVisited)
571       continue;
572 
573     if (!visitor.VisitSymbol(*SI))
574       return false;
575   }
576 
577   return true;
578 }
579 
scan(SVal val)580 bool ScanReachableSymbols::scan(SVal val) {
581   if (Optional<loc::MemRegionVal> X = val.getAs<loc::MemRegionVal>())
582     return scan(X->getRegion());
583 
584   if (Optional<nonloc::LazyCompoundVal> X =
585           val.getAs<nonloc::LazyCompoundVal>())
586     return scan(*X);
587 
588   if (Optional<nonloc::LocAsInteger> X = val.getAs<nonloc::LocAsInteger>())
589     return scan(X->getLoc());
590 
591   if (SymbolRef Sym = val.getAsSymbol())
592     return scan(Sym);
593 
594   if (const SymExpr *Sym = val.getAsSymbolicExpression())
595     return scan(Sym);
596 
597   if (Optional<nonloc::CompoundVal> X = val.getAs<nonloc::CompoundVal>())
598     return scan(*X);
599 
600   return true;
601 }
602 
scan(const MemRegion * R)603 bool ScanReachableSymbols::scan(const MemRegion *R) {
604   if (isa<MemSpaceRegion>(R))
605     return true;
606 
607   bool wasVisited = !visited.insert(R).second;
608   if (wasVisited)
609     return true;
610 
611   if (!visitor.VisitMemRegion(R))
612     return false;
613 
614   // If this is a symbolic region, visit the symbol for the region.
615   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
616     if (!visitor.VisitSymbol(SR->getSymbol()))
617       return false;
618 
619   // If this is a subregion, also visit the parent regions.
620   if (const SubRegion *SR = dyn_cast<SubRegion>(R)) {
621     const MemRegion *Super = SR->getSuperRegion();
622     if (!scan(Super))
623       return false;
624 
625     // When we reach the topmost region, scan all symbols in it.
626     if (isa<MemSpaceRegion>(Super)) {
627       StoreManager &StoreMgr = state->getStateManager().getStoreManager();
628       if (!StoreMgr.scanReachableSymbols(state->getStore(), SR, *this))
629         return false;
630     }
631   }
632 
633   // Regions captured by a block are also implicitly reachable.
634   if (const BlockDataRegion *BDR = dyn_cast<BlockDataRegion>(R)) {
635     BlockDataRegion::referenced_vars_iterator I = BDR->referenced_vars_begin(),
636                                               E = BDR->referenced_vars_end();
637     for ( ; I != E; ++I) {
638       if (!scan(I.getCapturedRegion()))
639         return false;
640     }
641   }
642 
643   return true;
644 }
645 
scanReachableSymbols(SVal val,SymbolVisitor & visitor) const646 bool ProgramState::scanReachableSymbols(SVal val, SymbolVisitor& visitor) const {
647   ScanReachableSymbols S(this, visitor);
648   return S.scan(val);
649 }
650 
scanReachableSymbols(llvm::iterator_range<region_iterator> Reachable,SymbolVisitor & visitor) const651 bool ProgramState::scanReachableSymbols(
652     llvm::iterator_range<region_iterator> Reachable,
653     SymbolVisitor &visitor) const {
654   ScanReachableSymbols S(this, visitor);
655   for (const MemRegion *R : Reachable) {
656     if (!S.scan(R))
657       return false;
658   }
659   return true;
660 }
661 
addTaint(const Stmt * S,const LocationContext * LCtx,TaintTagType Kind) const662 ProgramStateRef ProgramState::addTaint(const Stmt *S,
663                                            const LocationContext *LCtx,
664                                            TaintTagType Kind) const {
665   if (const Expr *E = dyn_cast_or_null<Expr>(S))
666     S = E->IgnoreParens();
667 
668   return addTaint(getSVal(S, LCtx), Kind);
669 }
670 
addTaint(SVal V,TaintTagType Kind) const671 ProgramStateRef ProgramState::addTaint(SVal V,
672                                        TaintTagType Kind) const {
673   SymbolRef Sym = V.getAsSymbol();
674   if (Sym)
675     return addTaint(Sym, Kind);
676 
677   // If the SVal represents a structure, try to mass-taint all values within the
678   // structure. For now it only works efficiently on lazy compound values that
679   // were conjured during a conservative evaluation of a function - either as
680   // return values of functions that return structures or arrays by value, or as
681   // values of structures or arrays passed into the function by reference,
682   // directly or through pointer aliasing. Such lazy compound values are
683   // characterized by having exactly one binding in their captured store within
684   // their parent region, which is a conjured symbol default-bound to the base
685   // region of the parent region.
686   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
687     if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
688       if (SymbolRef Sym = binding->getAsSymbol())
689         return addPartialTaint(Sym, LCV->getRegion(), Kind);
690     }
691   }
692 
693   const MemRegion *R = V.getAsRegion();
694   return addTaint(R, Kind);
695 }
696 
addTaint(const MemRegion * R,TaintTagType Kind) const697 ProgramStateRef ProgramState::addTaint(const MemRegion *R,
698                                            TaintTagType Kind) const {
699   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
700     return addTaint(SR->getSymbol(), Kind);
701   return this;
702 }
703 
addTaint(SymbolRef Sym,TaintTagType Kind) const704 ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
705                                            TaintTagType Kind) const {
706   // If this is a symbol cast, remove the cast before adding the taint. Taint
707   // is cast agnostic.
708   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
709     Sym = SC->getOperand();
710 
711   ProgramStateRef NewState = set<TaintMap>(Sym, Kind);
712   assert(NewState);
713   return NewState;
714 }
715 
addPartialTaint(SymbolRef ParentSym,const SubRegion * SubRegion,TaintTagType Kind) const716 ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
717                                               const SubRegion *SubRegion,
718                                               TaintTagType Kind) const {
719   // Ignore partial taint if the entire parent symbol is already tainted.
720   if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind)
721     return this;
722 
723   // Partial taint applies if only a portion of the symbol is tainted.
724   if (SubRegion == SubRegion->getBaseRegion())
725     return addTaint(ParentSym, Kind);
726 
727   const TaintedSubRegions *SavedRegs = get<DerivedSymTaint>(ParentSym);
728   TaintedSubRegions Regs =
729       SavedRegs ? *SavedRegs : stateMgr->TSRFactory.getEmptyMap();
730 
731   Regs = stateMgr->TSRFactory.add(Regs, SubRegion, Kind);
732   ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, Regs);
733   assert(NewState);
734   return NewState;
735 }
736 
isTainted(const Stmt * S,const LocationContext * LCtx,TaintTagType Kind) const737 bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
738                              TaintTagType Kind) const {
739   if (const Expr *E = dyn_cast_or_null<Expr>(S))
740     S = E->IgnoreParens();
741 
742   SVal val = getSVal(S, LCtx);
743   return isTainted(val, Kind);
744 }
745 
isTainted(SVal V,TaintTagType Kind) const746 bool ProgramState::isTainted(SVal V, TaintTagType Kind) const {
747   if (const SymExpr *Sym = V.getAsSymExpr())
748     return isTainted(Sym, Kind);
749   if (const MemRegion *Reg = V.getAsRegion())
750     return isTainted(Reg, Kind);
751   return false;
752 }
753 
isTainted(const MemRegion * Reg,TaintTagType K) const754 bool ProgramState::isTainted(const MemRegion *Reg, TaintTagType K) const {
755   if (!Reg)
756     return false;
757 
758   // Element region (array element) is tainted if either the base or the offset
759   // are tainted.
760   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
761     return isTainted(ER->getSuperRegion(), K) || isTainted(ER->getIndex(), K);
762 
763   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
764     return isTainted(SR->getSymbol(), K);
765 
766   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
767     return isTainted(ER->getSuperRegion(), K);
768 
769   return false;
770 }
771 
isTainted(SymbolRef Sym,TaintTagType Kind) const772 bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
773   if (!Sym)
774     return false;
775 
776   // Traverse all the symbols this symbol depends on to see if any are tainted.
777   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
778        SI != SE; ++SI) {
779     if (!isa<SymbolData>(*SI))
780       continue;
781 
782     if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
783       if (*Tag == Kind)
784         return true;
785     }
786 
787     if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) {
788       // If this is a SymbolDerived with a tainted parent, it's also tainted.
789       if (isTainted(SD->getParentSymbol(), Kind))
790         return true;
791 
792       // If this is a SymbolDerived with the same parent symbol as another
793       // tainted SymbolDerived and a region that's a sub-region of that tainted
794       // symbol, it's also tainted.
795       if (const TaintedSubRegions *Regs =
796               get<DerivedSymTaint>(SD->getParentSymbol())) {
797         const TypedValueRegion *R = SD->getRegion();
798         for (auto I : *Regs) {
799           // FIXME: The logic to identify tainted regions could be more
800           // complete. For example, this would not currently identify
801           // overlapping fields in a union as tainted. To identify this we can
802           // check for overlapping/nested byte offsets.
803           if (Kind == I.second && R->isSubRegionOf(I.first))
804             return true;
805         }
806       }
807     }
808 
809     // If memory region is tainted, data is also tainted.
810     if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
811       if (isTainted(SRV->getRegion(), Kind))
812         return true;
813     }
814 
815     // If this is a SymbolCast from a tainted value, it's also tainted.
816     if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) {
817       if (isTainted(SC->getOperand(), Kind))
818         return true;
819     }
820   }
821 
822   return false;
823 }
824