1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 static bool compareDistinctValues(QualType Type, Value &Val1,
54                                   const Environment &Env1, Value &Val2,
55                                   const Environment &Env2,
56                                   Environment::ValueModel &Model) {
57   // Note: Potentially costly, but, for booleans, we could check whether both
58   // can be proven equivalent in their respective environments.
59 
60   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
61   // and implement separate, join/widen specific handling for
62   // reference/pointers.
63   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
64   case ComparisonResult::Same:
65     return true;
66   case ComparisonResult::Different:
67     return false;
68   case ComparisonResult::Unknown:
69     switch (Val1.getKind()) {
70     case Value::Kind::Integer:
71     case Value::Kind::Reference:
72     case Value::Kind::Pointer:
73     case Value::Kind::Struct:
74       // FIXME: this choice intentionally introduces unsoundness to allow
75       // for convergence. Once we have widening support for the
76       // reference/pointer and struct built-in models, this should be
77       // `false`.
78       return true;
79     default:
80       return false;
81     }
82   }
83   llvm_unreachable("All cases covered in switch");
84 }
85 
86 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
87 /// respectively, of the same type `Type`. Merging generally produces a single
88 /// value that (soundly) approximates the two inputs, although the actual
89 /// meaning depends on `Model`.
90 static Value *mergeDistinctValues(QualType Type, Value &Val1,
91                                   const Environment &Env1, Value &Val2,
92                                   const Environment &Env2,
93                                   Environment &MergedEnv,
94                                   Environment::ValueModel &Model) {
95   // Join distinct boolean values preserving information about the constraints
96   // in the respective path conditions.
97   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
98     // FIXME: Checking both values should be unnecessary, since they should have
99     // a consistent shape.  However, right now we can end up with BoolValue's in
100     // integer-typed variables due to our incorrect handling of
101     // boolean-to-integer casts (we just propagate the BoolValue to the result
102     // of the cast). So, a join can encounter an integer in one branch but a
103     // bool in the other.
104     // For example:
105     // ```
106     // std::optional<bool> o;
107     // int x;
108     // if (o.has_value())
109     //   x = o.value();
110     // ```
111     auto &Expr1 = cast<BoolValue>(Val1).formula();
112     auto &Expr2 = cast<BoolValue>(Val2).formula();
113     auto &A = MergedEnv.arena();
114     auto &MergedVal = A.makeAtomRef(A.makeAtom());
115     MergedEnv.addToFlowCondition(
116         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
117                            A.makeEquals(MergedVal, Expr1)),
118                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
119                            A.makeEquals(MergedVal, Expr2))));
120     return &A.makeBoolValue(MergedVal);
121   }
122 
123   Value *MergedVal = nullptr;
124   if (auto *StructVal1 = dyn_cast<StructValue>(&Val1)) {
125     [[maybe_unused]] auto *StructVal2 = cast<StructValue>(&Val2);
126 
127     // Values to be merged are always associated with the same location in
128     // `LocToVal`. The location stored in `StructVal` should therefore also
129     // be the same.
130     assert(&StructVal1->getAggregateLoc() == &StructVal2->getAggregateLoc());
131 
132     // `StructVal1` and `StructVal2` may have different properties associated
133     // with them. Create a new `StructValue` without any properties so that we
134     // soundly approximate both values. If a particular analysis needs to merge
135     // properties, it should do so in `DataflowAnalysis::merge()`.
136     MergedVal = &MergedEnv.create<StructValue>(StructVal1->getAggregateLoc());
137   } else {
138     MergedVal = MergedEnv.createValue(Type);
139   }
140 
141   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
142   // returns false to avoid storing unneeded values in `DACtx`.
143   // FIXME: Creating the value based on the type alone creates misshapen values
144   // for lvalues, since the type does not reflect the need for `ReferenceValue`.
145   // This issue will be resolved when `ReferenceValue` is eliminated as part
146   // of the ongoing migration to strict handling of value categories (see
147   // https://discourse.llvm.org/t/70086 for details).
148   if (MergedVal)
149     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
150       return MergedVal;
151 
152   return nullptr;
153 }
154 
155 // When widening does not change `Current`, return value will equal `&Prev`.
156 static Value &widenDistinctValues(QualType Type, Value &Prev,
157                                   const Environment &PrevEnv, Value &Current,
158                                   Environment &CurrentEnv,
159                                   Environment::ValueModel &Model) {
160   // Boolean-model widening.
161   if (isa<BoolValue>(&Prev)) {
162     assert(isa<BoolValue>(Current));
163     // Widen to Top, because we know they are different values. If previous was
164     // already Top, re-use that to (implicitly) indicate that no change occured.
165     if (isa<TopBoolValue>(Prev))
166       return Prev;
167     return CurrentEnv.makeTopBoolValue();
168   }
169 
170   // FIXME: Add other built-in model widening.
171 
172   // Custom-model widening.
173   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
174     return *W;
175 
176   // Default of widening is a no-op: leave the current value unchanged.
177   return Current;
178 }
179 
180 /// Initializes a global storage value.
181 static void insertIfGlobal(const Decl &D,
182                            llvm::DenseSet<const VarDecl *> &Vars) {
183   if (auto *V = dyn_cast<VarDecl>(&D))
184     if (V->hasGlobalStorage())
185       Vars.insert(V);
186 }
187 
188 static void insertIfFunction(const Decl &D,
189                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
190   if (auto *FD = dyn_cast<FunctionDecl>(&D))
191     Funcs.insert(FD);
192 }
193 
194 static void
195 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
196                          llvm::DenseSet<const VarDecl *> &Vars,
197                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
198   insertIfGlobal(D, Vars);
199   insertIfFunction(D, Funcs);
200   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
201     for (const auto *B : Decomp->bindings())
202       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
203         // FIXME: should we be using `E->getFoundDecl()`?
204         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
205           Fields.insert(FD);
206 }
207 
208 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
209 /// global variables and functions that are declared in or referenced from
210 /// sub-statements.
211 static void
212 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
213                          llvm::DenseSet<const VarDecl *> &Vars,
214                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
215   for (auto *Child : S.children())
216     if (Child != nullptr)
217       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
218   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
219     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
220 
221   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
222     if (DS->isSingleDecl())
223       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
224     else
225       for (auto *D : DS->getDeclGroup())
226         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
227   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
228     insertIfGlobal(*E->getDecl(), Vars);
229     insertIfFunction(*E->getDecl(), Funcs);
230   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
231     // FIXME: should we be using `E->getFoundDecl()`?
232     const ValueDecl *VD = E->getMemberDecl();
233     insertIfGlobal(*VD, Vars);
234     insertIfFunction(*VD, Funcs);
235     if (const auto *FD = dyn_cast<FieldDecl>(VD))
236       Fields.insert(FD);
237   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
238     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
239       for (const auto *FD : getFieldsForInitListExpr(RD))
240         Fields.insert(FD);
241   }
242 }
243 
244 // FIXME: Add support for resetting globals after function calls to enable
245 // the implementation of sound analyses.
246 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
247   assert(FuncDecl->getBody() != nullptr);
248 
249   FieldSet Fields;
250   llvm::DenseSet<const VarDecl *> Vars;
251   llvm::DenseSet<const FunctionDecl *> Funcs;
252 
253   // Look for global variable and field references in the
254   // constructor-initializers.
255   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
256     for (const auto *Init : CtorDecl->inits()) {
257       if (Init->isMemberInitializer()) {
258         Fields.insert(Init->getMember());
259       } else if (Init->isIndirectMemberInitializer()) {
260         for (const auto *I : Init->getIndirectMember()->chain())
261           Fields.insert(cast<FieldDecl>(I));
262       }
263       const Expr *E = Init->getInit();
264       assert(E != nullptr);
265       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
266     }
267     // Add all fields mentioned in default member initializers.
268     for (const FieldDecl *F : CtorDecl->getParent()->fields())
269       if (const auto *I = F->getInClassInitializer())
270           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
271   }
272   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
273 
274   // These have to be added before the lines that follow to ensure that
275   // `create*` work correctly for structs.
276   DACtx->addModeledFields(Fields);
277 
278   for (const VarDecl *D : Vars) {
279     if (getStorageLocation(*D) != nullptr)
280       continue;
281 
282     setStorageLocation(*D, createObject(*D));
283   }
284 
285   for (const FunctionDecl *FD : Funcs) {
286     if (getStorageLocation(*FD) != nullptr)
287       continue;
288     auto &Loc = createStorageLocation(FD->getType());
289     setStorageLocation(*FD, Loc);
290   }
291 }
292 
293 Environment::Environment(DataflowAnalysisContext &DACtx)
294     : DACtx(&DACtx),
295       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
296 
297 Environment Environment::fork() const {
298   Environment Copy(*this);
299   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
300   return Copy;
301 }
302 
303 Environment::Environment(DataflowAnalysisContext &DACtx,
304                          const DeclContext &DeclCtx)
305     : Environment(DACtx) {
306   CallStack.push_back(&DeclCtx);
307 
308   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
309     assert(FuncDecl->getBody() != nullptr);
310 
311     initFieldsGlobalsAndFuncs(FuncDecl);
312 
313     for (const auto *ParamDecl : FuncDecl->parameters()) {
314       assert(ParamDecl != nullptr);
315       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
316     }
317   }
318 
319   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
320     auto *Parent = MethodDecl->getParent();
321     assert(Parent != nullptr);
322     if (Parent->isLambda())
323       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
324 
325     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
326     if (MethodDecl && !MethodDecl->isStatic()) {
327       QualType ThisPointeeType = MethodDecl->getThisObjectType();
328       ThisPointeeLoc =
329           &cast<StructValue>(createValue(ThisPointeeType))->getAggregateLoc();
330     }
331   }
332 }
333 
334 bool Environment::canDescend(unsigned MaxDepth,
335                              const DeclContext *Callee) const {
336   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
337 }
338 
339 Environment Environment::pushCall(const CallExpr *Call) const {
340   Environment Env(*this);
341 
342   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
343     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
344       if (!isa<CXXThisExpr>(Arg))
345         Env.ThisPointeeLoc = cast<AggregateStorageLocation>(
346             getStorageLocation(*Arg, SkipPast::Reference));
347       // Otherwise (when the argument is `this`), retain the current
348       // environment's `ThisPointeeLoc`.
349     }
350   }
351 
352   Env.pushCallInternal(Call->getDirectCallee(),
353                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
354 
355   return Env;
356 }
357 
358 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
359   Environment Env(*this);
360 
361   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
362 
363   Env.pushCallInternal(Call->getConstructor(),
364                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
365 
366   return Env;
367 }
368 
369 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
370                                    ArrayRef<const Expr *> Args) {
371   // Canonicalize to the definition of the function. This ensures that we're
372   // putting arguments into the same `ParamVarDecl`s` that the callee will later
373   // be retrieving them from.
374   assert(FuncDecl->getDefinition() != nullptr);
375   FuncDecl = FuncDecl->getDefinition();
376 
377   CallStack.push_back(FuncDecl);
378 
379   initFieldsGlobalsAndFuncs(FuncDecl);
380 
381   const auto *ParamIt = FuncDecl->param_begin();
382 
383   // FIXME: Parameters don't always map to arguments 1:1; examples include
384   // overloaded operators implemented as member functions, and parameter packs.
385   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
386     assert(ParamIt != FuncDecl->param_end());
387     const VarDecl *Param = *ParamIt;
388     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
389   }
390 }
391 
392 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
393   // We ignore `DACtx` because it's already the same in both. We don't want the
394   // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
395   // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
396   // analyze the same callee in a different context, and `setStorageLocation`
397   // requires there to not already be a storage location assigned. Conceptually,
398   // these maps capture information from the local scope, so when popping that
399   // scope, we do not propagate the maps.
400   this->LocToVal = std::move(CalleeEnv.LocToVal);
401   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
402 
403   if (Call->isGLValue()) {
404     if (CalleeEnv.ReturnLoc != nullptr)
405       setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc);
406   } else if (!Call->getType()->isVoidType()) {
407     if (CalleeEnv.ReturnVal != nullptr)
408       setValueStrict(*Call, *CalleeEnv.ReturnVal);
409   }
410 }
411 
412 void Environment::popCall(const CXXConstructExpr *Call,
413                           const Environment &CalleeEnv) {
414   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
415   this->LocToVal = std::move(CalleeEnv.LocToVal);
416   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
417 
418   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
419     setValueStrict(*Call, *Val);
420   }
421 }
422 
423 bool Environment::equivalentTo(const Environment &Other,
424                                Environment::ValueModel &Model) const {
425   assert(DACtx == Other.DACtx);
426 
427   if (ReturnVal != Other.ReturnVal)
428     return false;
429 
430   if (ReturnLoc != Other.ReturnLoc)
431     return false;
432 
433   if (ThisPointeeLoc != Other.ThisPointeeLoc)
434     return false;
435 
436   if (DeclToLoc != Other.DeclToLoc)
437     return false;
438 
439   if (ExprToLoc != Other.ExprToLoc)
440     return false;
441 
442   // Compare the contents for the intersection of their domains.
443   for (auto &Entry : LocToVal) {
444     const StorageLocation *Loc = Entry.first;
445     assert(Loc != nullptr);
446 
447     Value *Val = Entry.second;
448     assert(Val != nullptr);
449 
450     auto It = Other.LocToVal.find(Loc);
451     if (It == Other.LocToVal.end())
452       continue;
453     assert(It->second != nullptr);
454 
455     if (!areEquivalentValues(*Val, *It->second) &&
456         !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other,
457                                Model))
458       return false;
459   }
460 
461   return true;
462 }
463 
464 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
465                                      Environment::ValueModel &Model) {
466   assert(DACtx == PrevEnv.DACtx);
467   assert(ReturnVal == PrevEnv.ReturnVal);
468   assert(ReturnLoc == PrevEnv.ReturnLoc);
469   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
470   assert(CallStack == PrevEnv.CallStack);
471 
472   auto Effect = LatticeJoinEffect::Unchanged;
473 
474   // By the API, `PrevEnv` is a previous version of the environment for the same
475   // block, so we have some guarantees about its shape. In particular, it will
476   // be the result of a join or widen operation on previous values for this
477   // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are
478   // subsets of the maps in `PrevEnv`. So, as long as we maintain this property
479   // here, we don't need change their current values to widen.
480   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
481   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
482 
483   llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
484   for (auto &Entry : LocToVal) {
485     const StorageLocation *Loc = Entry.first;
486     assert(Loc != nullptr);
487 
488     Value *Val = Entry.second;
489     assert(Val != nullptr);
490 
491     auto PrevIt = PrevEnv.LocToVal.find(Loc);
492     if (PrevIt == PrevEnv.LocToVal.end())
493       continue;
494     assert(PrevIt->second != nullptr);
495 
496     if (areEquivalentValues(*Val, *PrevIt->second)) {
497       WidenedLocToVal.insert({Loc, Val});
498       continue;
499     }
500 
501     Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second,
502                                             PrevEnv, *Val, *this, Model);
503     WidenedLocToVal.insert({Loc, &WidenedVal});
504     if (&WidenedVal != PrevIt->second)
505       Effect = LatticeJoinEffect::Changed;
506   }
507   LocToVal = std::move(WidenedLocToVal);
508   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
509       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
510       LocToVal.size() != PrevEnv.LocToVal.size())
511     Effect = LatticeJoinEffect::Changed;
512 
513   return Effect;
514 }
515 
516 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
517                               Environment::ValueModel &Model) {
518   assert(EnvA.DACtx == EnvB.DACtx);
519   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
520   assert(EnvA.CallStack == EnvB.CallStack);
521 
522   Environment JoinedEnv(*EnvA.DACtx);
523 
524   JoinedEnv.CallStack = EnvA.CallStack;
525   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
526 
527   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
528     // `ReturnVal` might not always get set -- for example if we have a return
529     // statement of the form `return some_other_func()` and we decide not to
530     // analyze `some_other_func()`.
531     // In this case, we can't say anything about the joined return value -- we
532     // don't simply want to propagate the return value that we do have, because
533     // it might not be the correct one.
534     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
535     JoinedEnv.ReturnVal = nullptr;
536   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
537     JoinedEnv.ReturnVal = EnvA.ReturnVal;
538   } else {
539     assert(!EnvA.CallStack.empty());
540     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
541     // cast.
542     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
543     assert(Func != nullptr);
544     if (Value *MergedVal =
545             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
546                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
547       JoinedEnv.ReturnVal = MergedVal;
548   }
549 
550   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
551     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
552   else
553     JoinedEnv.ReturnLoc = nullptr;
554 
555   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
556   // lifetime ends, add an assertion that there aren't any entries in
557   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
558   // different storage locations.
559   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
560 
561   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
562 
563   // FIXME: update join to detect backedges and simplify the flow condition
564   // accordingly.
565   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
566       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
567 
568   for (auto &Entry : EnvA.LocToVal) {
569     const StorageLocation *Loc = Entry.first;
570     assert(Loc != nullptr);
571 
572     Value *Val = Entry.second;
573     assert(Val != nullptr);
574 
575     auto It = EnvB.LocToVal.find(Loc);
576     if (It == EnvB.LocToVal.end())
577       continue;
578     assert(It->second != nullptr);
579 
580     if (areEquivalentValues(*Val, *It->second)) {
581       JoinedEnv.LocToVal.insert({Loc, Val});
582       continue;
583     }
584 
585     if (Value *MergedVal = mergeDistinctValues(
586             Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) {
587       JoinedEnv.LocToVal.insert({Loc, MergedVal});
588     }
589   }
590 
591   return JoinedEnv;
592 }
593 
594 StorageLocation &Environment::createStorageLocation(QualType Type) {
595   return DACtx->createStorageLocation(Type);
596 }
597 
598 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
599   // Evaluated declarations are always assigned the same storage locations to
600   // ensure that the environment stabilizes across loop iterations. Storage
601   // locations for evaluated declarations are stored in the analysis context.
602   return DACtx->getStableStorageLocation(D);
603 }
604 
605 StorageLocation &Environment::createStorageLocation(const Expr &E) {
606   // Evaluated expressions are always assigned the same storage locations to
607   // ensure that the environment stabilizes across loop iterations. Storage
608   // locations for evaluated expressions are stored in the analysis context.
609   return DACtx->getStableStorageLocation(E);
610 }
611 
612 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
613   assert(!DeclToLoc.contains(&D));
614   assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc)));
615   DeclToLoc[&D] = &Loc;
616 }
617 
618 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
619   auto It = DeclToLoc.find(&D);
620   if (It == DeclToLoc.end())
621     return nullptr;
622 
623   StorageLocation *Loc = It->second;
624 
625   assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc)));
626 
627   return Loc;
628 }
629 
630 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
631   const Expr &CanonE = ignoreCFGOmittedNodes(E);
632   assert(!ExprToLoc.contains(&CanonE));
633   ExprToLoc[&CanonE] = &Loc;
634 }
635 
636 void Environment::setStorageLocationStrict(const Expr &E,
637                                            StorageLocation &Loc) {
638   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
639   // but we still want to be able to associate a `StorageLocation` with them,
640   // so allow these as an exception.
641   assert(E.isGLValue() ||
642          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
643   setStorageLocation(E, Loc);
644 }
645 
646 StorageLocation *Environment::getStorageLocation(const Expr &E,
647                                                  SkipPast SP) const {
648   // FIXME: Add a test with parens.
649   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
650   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
651 }
652 
653 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const {
654   // See comment in `setStorageLocationStrict()`.
655   assert(E.isGLValue() ||
656          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
657   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
658 
659   if (Loc == nullptr)
660     return nullptr;
661 
662   if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc)))
663     return &RefVal->getReferentLoc();
664 
665   return Loc;
666 }
667 
668 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const {
669   return ThisPointeeLoc;
670 }
671 
672 AggregateStorageLocation &
673 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
674   assert(RecordPRValue.getType()->isRecordType());
675   assert(RecordPRValue.isPRValue());
676 
677   if (StorageLocation *ExistingLoc =
678           getStorageLocation(RecordPRValue, SkipPast::None))
679     return *cast<AggregateStorageLocation>(ExistingLoc);
680   auto &Loc = cast<AggregateStorageLocation>(
681       DACtx->getStableStorageLocation(RecordPRValue));
682   setStorageLocation(RecordPRValue, Loc);
683   return Loc;
684 }
685 
686 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
687   return DACtx->getOrCreateNullPointerValue(PointeeType);
688 }
689 
690 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
691   assert(!isa<StructValue>(&Val) ||
692          &cast<StructValue>(&Val)->getAggregateLoc() == &Loc);
693 
694   LocToVal[&Loc] = &Val;
695 }
696 
697 void Environment::setValueStrict(const Expr &E, Value &Val) {
698   assert(E.isPRValue());
699   assert(!isa<ReferenceValue>(Val));
700 
701   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
702     if (auto *ExistingVal = cast_or_null<StructValue>(getValueStrict(E)))
703       assert(&ExistingVal->getAggregateLoc() == &StructVal->getAggregateLoc());
704     if (StorageLocation *ExistingLoc = getStorageLocation(E, SkipPast::None))
705       assert(ExistingLoc == &StructVal->getAggregateLoc());
706     else
707       setStorageLocation(E, StructVal->getAggregateLoc());
708     setValue(StructVal->getAggregateLoc(), Val);
709     return;
710   }
711 
712   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
713   if (Loc == nullptr) {
714     Loc = &createStorageLocation(E);
715     setStorageLocation(E, *Loc);
716   }
717   setValue(*Loc, Val);
718 }
719 
720 Value *Environment::getValue(const StorageLocation &Loc) const {
721   return LocToVal.lookup(&Loc);
722 }
723 
724 Value *Environment::getValue(const ValueDecl &D) const {
725   auto *Loc = getStorageLocation(D);
726   if (Loc == nullptr)
727     return nullptr;
728   return getValue(*Loc);
729 }
730 
731 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
732   auto *Loc = getStorageLocation(E, SP);
733   if (Loc == nullptr)
734     return nullptr;
735   return getValue(*Loc);
736 }
737 
738 Value *Environment::getValueStrict(const Expr &E) const {
739   assert(E.isPRValue());
740   Value *Val = getValue(E, SkipPast::None);
741 
742   assert(Val == nullptr || !isa<ReferenceValue>(Val));
743 
744   return Val;
745 }
746 
747 Value *Environment::createValue(QualType Type) {
748   llvm::DenseSet<QualType> Visited;
749   int CreatedValuesCount = 0;
750   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
751                                                 CreatedValuesCount);
752   if (CreatedValuesCount > MaxCompositeValueSize) {
753     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
754                  << '\n';
755   }
756   return Val;
757 }
758 
759 Value *Environment::createValueUnlessSelfReferential(
760     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
761     int &CreatedValuesCount) {
762   assert(!Type.isNull());
763 
764   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
765   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
766       Depth > MaxCompositeValueDepth)
767     return nullptr;
768 
769   if (Type->isBooleanType()) {
770     CreatedValuesCount++;
771     return &makeAtomicBoolValue();
772   }
773 
774   if (Type->isIntegerType()) {
775     // FIXME: consider instead `return nullptr`, given that we do nothing useful
776     // with integers, and so distinguishing them serves no purpose, but could
777     // prevent convergence.
778     CreatedValuesCount++;
779     return &arena().create<IntegerValue>();
780   }
781 
782   if (Type->isReferenceType() || Type->isPointerType()) {
783     CreatedValuesCount++;
784     QualType PointeeType = Type->getPointeeType();
785     StorageLocation &PointeeLoc =
786         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
787 
788     if (Type->isReferenceType())
789       return &arena().create<ReferenceValue>(PointeeLoc);
790     else
791       return &arena().create<PointerValue>(PointeeLoc);
792   }
793 
794   if (Type->isRecordType()) {
795     CreatedValuesCount++;
796     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
797     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
798       assert(Field != nullptr);
799 
800       QualType FieldType = Field->getType();
801 
802       FieldLocs.insert(
803           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
804                                           CreatedValuesCount)});
805     }
806 
807     AggregateStorageLocation &Loc =
808         arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs));
809     StructValue &StructVal = create<StructValue>(Loc);
810 
811     // As we already have a storage location for the `StructValue`, we can and
812     // should associate them in the environment.
813     setValue(Loc, StructVal);
814 
815     return &StructVal;
816   }
817 
818   return nullptr;
819 }
820 
821 StorageLocation &
822 Environment::createLocAndMaybeValue(QualType Ty,
823                                     llvm::DenseSet<QualType> &Visited,
824                                     int Depth, int &CreatedValuesCount) {
825   if (!Visited.insert(Ty.getCanonicalType()).second)
826     return createStorageLocation(Ty.getNonReferenceType());
827   Value *Val = createValueUnlessSelfReferential(
828       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
829   Visited.erase(Ty.getCanonicalType());
830 
831   Ty = Ty.getNonReferenceType();
832 
833   if (Val == nullptr)
834     return createStorageLocation(Ty);
835 
836   if (Ty->isRecordType())
837     return cast<StructValue>(Val)->getAggregateLoc();
838 
839   StorageLocation &Loc = createStorageLocation(Ty);
840   setValue(Loc, *Val);
841   return Loc;
842 }
843 
844 StorageLocation &Environment::createObjectInternal(const VarDecl *D,
845                                                    QualType Ty,
846                                                    const Expr *InitExpr) {
847   if (Ty->isReferenceType()) {
848     // Although variables of reference type always need to be initialized, it
849     // can happen that we can't see the initializer, so `InitExpr` may still
850     // be null.
851     if (InitExpr) {
852       if (auto *InitExprLoc =
853               getStorageLocation(*InitExpr, SkipPast::Reference))
854         return *InitExprLoc;
855     }
856 
857     // Even though we have an initializer, we might not get an
858     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
859     // don't have a function body. In this case, we just invent a storage
860     // location and value -- it's the best we can do.
861     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
862   }
863 
864   Value *Val = nullptr;
865   if (InitExpr)
866     // In the (few) cases where an expression is intentionally
867     // "uninterpreted", `InitExpr` is not associated with a value.  There are
868     // two ways to handle this situation: propagate the status, so that
869     // uninterpreted initializers result in uninterpreted variables, or
870     // provide a default value. We choose the latter so that later refinements
871     // of the variable can be used for reasoning about the surrounding code.
872     // For this reason, we let this case be handled by the `createValue()`
873     // call below.
874     //
875     // FIXME. If and when we interpret all language cases, change this to
876     // assert that `InitExpr` is interpreted, rather than supplying a
877     // default value (assuming we don't update the environment API to return
878     // references).
879     Val = getValueStrict(*InitExpr);
880   if (!Val)
881     Val = createValue(Ty);
882 
883   if (Ty->isRecordType())
884     return cast<StructValue>(Val)->getAggregateLoc();
885 
886   StorageLocation &Loc =
887       D ? createStorageLocation(*D) : createStorageLocation(Ty);
888 
889   if (Val)
890     setValue(Loc, *Val);
891 
892   return Loc;
893 }
894 
895 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
896   switch (SP) {
897   case SkipPast::None:
898     return Loc;
899   case SkipPast::Reference:
900     // References cannot be chained so we only need to skip past one level of
901     // indirection.
902     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
903       return Val->getReferentLoc();
904     return Loc;
905   }
906   llvm_unreachable("bad SkipPast kind");
907 }
908 
909 const StorageLocation &Environment::skip(const StorageLocation &Loc,
910                                          SkipPast SP) const {
911   return skip(*const_cast<StorageLocation *>(&Loc), SP);
912 }
913 
914 void Environment::addToFlowCondition(const Formula &Val) {
915   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
916 }
917 
918 bool Environment::flowConditionImplies(const Formula &Val) const {
919   return DACtx->flowConditionImplies(FlowConditionToken, Val);
920 }
921 
922 void Environment::dump(raw_ostream &OS) const {
923   // FIXME: add printing for remaining fields and allow caller to decide what
924   // fields are printed.
925   OS << "DeclToLoc:\n";
926   for (auto [D, L] : DeclToLoc)
927     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
928 
929   OS << "ExprToLoc:\n";
930   for (auto [E, L] : ExprToLoc)
931     OS << "  [" << E << ", " << L << "]\n";
932 
933   OS << "LocToVal:\n";
934   for (auto [L, V] : LocToVal) {
935     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
936   }
937 
938   OS << "FlowConditionToken:\n";
939   DACtx->dumpFlowCondition(FlowConditionToken, OS);
940 }
941 
942 void Environment::dump() const {
943   dump(llvm::dbgs());
944 }
945 
946 AggregateStorageLocation *
947 getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
948                           const Environment &Env) {
949   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
950   if (ImplicitObject == nullptr)
951     return nullptr;
952   StorageLocation *Loc =
953       Env.getStorageLocation(*ImplicitObject, SkipPast::Reference);
954   if (Loc == nullptr)
955     return nullptr;
956   if (ImplicitObject->getType()->isPointerType()) {
957     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc)))
958       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
959     return nullptr;
960   }
961   return cast<AggregateStorageLocation>(Loc);
962 }
963 
964 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
965                                                 const Environment &Env) {
966   Expr *Base = ME.getBase();
967   if (Base == nullptr)
968     return nullptr;
969   StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference);
970   if (Loc == nullptr)
971     return nullptr;
972   if (ME.isArrow()) {
973     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc)))
974       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
975     return nullptr;
976   }
977   return cast<AggregateStorageLocation>(Loc);
978 }
979 
980 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
981   // Unnamed bitfields are only used for padding and do not appear in
982   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
983   // field list, and we thus need to remove them before mapping inits to
984   // fields to avoid mapping inits to the wrongs fields.
985   std::vector<FieldDecl *> Fields;
986   llvm::copy_if(
987       RD->fields(), std::back_inserter(Fields),
988       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
989   return Fields;
990 }
991 
992 StructValue &refreshStructValue(AggregateStorageLocation &Loc,
993                                 Environment &Env) {
994   auto &NewVal = Env.create<StructValue>(Loc);
995   Env.setValue(Loc, NewVal);
996   return NewVal;
997 }
998 
999 StructValue &refreshStructValue(const Expr &Expr, Environment &Env) {
1000   assert(Expr.getType()->isRecordType());
1001 
1002   if (Expr.isPRValue()) {
1003     if (auto *ExistingVal =
1004             cast_or_null<StructValue>(Env.getValueStrict(Expr))) {
1005       auto &NewVal = Env.create<StructValue>(ExistingVal->getAggregateLoc());
1006       Env.setValueStrict(Expr, NewVal);
1007       return NewVal;
1008     }
1009 
1010     auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
1011     Env.setValueStrict(Expr, NewVal);
1012     return NewVal;
1013   }
1014 
1015   if (auto *Loc = cast_or_null<AggregateStorageLocation>(
1016           Env.getStorageLocationStrict(Expr))) {
1017     auto &NewVal = Env.create<StructValue>(*Loc);
1018     Env.setValue(*Loc, NewVal);
1019     return NewVal;
1020   }
1021 
1022   auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
1023   Env.setStorageLocationStrict(Expr, NewVal.getAggregateLoc());
1024   return NewVal;
1025 }
1026 
1027 } // namespace dataflow
1028 } // namespace clang
1029