1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a DataflowAnalysisContext class that owns objects that
10 // encompass the state of a program and stores context that is used during
11 // dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
18 #include "clang/Analysis/FlowSensitive/Formula.h"
19 #include "clang/Analysis/FlowSensitive/Logger.h"
20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/SetOperations.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <memory>
31 #include <string>
32 #include <utility>
33 #include <vector>
34
35 static llvm::cl::opt<std::string> DataflowLog(
36 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
37 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
38 "log to stderr. With an arg, writes HTML logs under the "
39 "specified directory (one per analyzed function)."));
40
41 namespace clang {
42 namespace dataflow {
43
getModeledFields(QualType Type)44 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
45 // During context-sensitive analysis, a struct may be allocated in one
46 // function, but its field accessed in a function lower in the stack than
47 // the allocation. Since we only collect fields used in the function where
48 // the allocation occurs, we can't apply that filter when performing
49 // context-sensitive analysis. But, this only applies to storage locations,
50 // since field access it not allowed to fail. In contrast, field *values*
51 // don't need this allowance, since the API allows for uninitialized fields.
52 if (Opts.ContextSensitiveOpts)
53 return getObjectFields(Type);
54
55 return llvm::set_intersection(getObjectFields(Type), ModeledFields);
56 }
57
addModeledFields(const FieldSet & Fields)58 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
59 ModeledFields.set_union(Fields);
60 }
61
createStorageLocation(QualType Type)62 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
63 if (!Type.isNull() && Type->isRecordType()) {
64 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
65 for (const FieldDecl *Field : getModeledFields(Type))
66 if (Field->getType()->isReferenceType())
67 FieldLocs.insert({Field, nullptr});
68 else
69 FieldLocs.insert({Field, &createStorageLocation(
70 Field->getType().getNonReferenceType())});
71
72 RecordStorageLocation::SyntheticFieldMap SyntheticFields;
73 for (const auto &Entry : getSyntheticFields(Type))
74 SyntheticFields.insert(
75 {Entry.getKey(),
76 &createStorageLocation(Entry.getValue().getNonReferenceType())});
77
78 return createRecordStorageLocation(Type, std::move(FieldLocs),
79 std::move(SyntheticFields));
80 }
81 return arena().create<ScalarStorageLocation>(Type);
82 }
83
84 // Returns the keys for a given `StringMap`.
85 // Can't use `StringSet` as the return type as it doesn't support `operator==`.
86 template <typename T>
getKeys(const llvm::StringMap<T> & Map)87 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
88 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
89 }
90
createRecordStorageLocation(QualType Type,RecordStorageLocation::FieldToLoc FieldLocs,RecordStorageLocation::SyntheticFieldMap SyntheticFields)91 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
92 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
93 RecordStorageLocation::SyntheticFieldMap SyntheticFields) {
94 assert(Type->isRecordType());
95 assert(containsSameFields(getModeledFields(Type), FieldLocs));
96 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
97
98 RecordStorageLocationCreated = true;
99 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
100 std::move(SyntheticFields));
101 }
102
103 StorageLocation &
getStableStorageLocation(const ValueDecl & D)104 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
105 if (auto *Loc = DeclToLoc.lookup(&D))
106 return *Loc;
107 auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
108 DeclToLoc[&D] = &Loc;
109 return Loc;
110 }
111
112 StorageLocation &
getStableStorageLocation(const Expr & E)113 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
114 const Expr &CanonE = ignoreCFGOmittedNodes(E);
115
116 if (auto *Loc = ExprToLoc.lookup(&CanonE))
117 return *Loc;
118 auto &Loc = createStorageLocation(CanonE.getType());
119 ExprToLoc[&CanonE] = &Loc;
120 return Loc;
121 }
122
123 PointerValue &
getOrCreateNullPointerValue(QualType PointeeType)124 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
125 auto CanonicalPointeeType =
126 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
127 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
128 if (Res.second) {
129 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
130 Res.first->second = &arena().create<PointerValue>(PointeeLoc);
131 }
132 return *Res.first->second;
133 }
134
addInvariant(const Formula & Constraint)135 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
136 if (Invariant == nullptr)
137 Invariant = &Constraint;
138 else
139 Invariant = &arena().makeAnd(*Invariant, Constraint);
140 }
141
addFlowConditionConstraint(Atom Token,const Formula & Constraint)142 void DataflowAnalysisContext::addFlowConditionConstraint(
143 Atom Token, const Formula &Constraint) {
144 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
145 if (!Res.second) {
146 Res.first->second =
147 &arena().makeAnd(*Res.first->second, Constraint);
148 }
149 }
150
forkFlowCondition(Atom Token)151 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
152 Atom ForkToken = arena().makeFlowConditionToken();
153 FlowConditionDeps[ForkToken].insert(Token);
154 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
155 return ForkToken;
156 }
157
158 Atom
joinFlowConditions(Atom FirstToken,Atom SecondToken)159 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
160 Atom SecondToken) {
161 Atom Token = arena().makeFlowConditionToken();
162 FlowConditionDeps[Token].insert(FirstToken);
163 FlowConditionDeps[Token].insert(SecondToken);
164 addFlowConditionConstraint(Token,
165 arena().makeOr(arena().makeAtomRef(FirstToken),
166 arena().makeAtomRef(SecondToken)));
167 return Token;
168 }
169
querySolver(llvm::SetVector<const Formula * > Constraints)170 Solver::Result DataflowAnalysisContext::querySolver(
171 llvm::SetVector<const Formula *> Constraints) {
172 return S->solve(Constraints.getArrayRef());
173 }
174
flowConditionImplies(Atom Token,const Formula & F)175 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
176 const Formula &F) {
177 if (F.isLiteral(true))
178 return true;
179
180 // Returns true if and only if truth assignment of the flow condition implies
181 // that `F` is also true. We prove whether or not this property holds by
182 // reducing the problem to satisfiability checking. In other words, we attempt
183 // to show that assuming `F` is false makes the constraints induced by the
184 // flow condition unsatisfiable.
185 llvm::SetVector<const Formula *> Constraints;
186 Constraints.insert(&arena().makeAtomRef(Token));
187 Constraints.insert(&arena().makeNot(F));
188 addTransitiveFlowConditionConstraints(Token, Constraints);
189 return isUnsatisfiable(std::move(Constraints));
190 }
191
flowConditionAllows(Atom Token,const Formula & F)192 bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
193 const Formula &F) {
194 if (F.isLiteral(false))
195 return false;
196
197 llvm::SetVector<const Formula *> Constraints;
198 Constraints.insert(&arena().makeAtomRef(Token));
199 Constraints.insert(&F);
200 addTransitiveFlowConditionConstraints(Token, Constraints);
201 return isSatisfiable(std::move(Constraints));
202 }
203
equivalentFormulas(const Formula & Val1,const Formula & Val2)204 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
205 const Formula &Val2) {
206 llvm::SetVector<const Formula *> Constraints;
207 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
208 return isUnsatisfiable(std::move(Constraints));
209 }
210
addTransitiveFlowConditionConstraints(Atom Token,llvm::SetVector<const Formula * > & Constraints)211 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
212 Atom Token, llvm::SetVector<const Formula *> &Constraints) {
213 llvm::DenseSet<Atom> AddedTokens;
214 std::vector<Atom> Remaining = {Token};
215
216 if (Invariant)
217 Constraints.insert(Invariant);
218 // Define all the flow conditions that might be referenced in constraints.
219 while (!Remaining.empty()) {
220 auto Token = Remaining.back();
221 Remaining.pop_back();
222 if (!AddedTokens.insert(Token).second)
223 continue;
224
225 auto ConstraintsIt = FlowConditionConstraints.find(Token);
226 if (ConstraintsIt == FlowConditionConstraints.end()) {
227 Constraints.insert(&arena().makeAtomRef(Token));
228 } else {
229 // Bind flow condition token via `iff` to its set of constraints:
230 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
231 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
232 *ConstraintsIt->second));
233 }
234
235 if (auto DepsIt = FlowConditionDeps.find(Token);
236 DepsIt != FlowConditionDeps.end())
237 for (Atom A : DepsIt->second)
238 Remaining.push_back(A);
239 }
240 }
241
printAtomList(const llvm::SmallVector<Atom> & Atoms,llvm::raw_ostream & OS)242 static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
243 llvm::raw_ostream &OS) {
244 OS << "(";
245 for (size_t i = 0; i < Atoms.size(); ++i) {
246 OS << Atoms[i];
247 if (i + 1 < Atoms.size())
248 OS << ", ";
249 }
250 OS << ")\n";
251 }
252
dumpFlowCondition(Atom Token,llvm::raw_ostream & OS)253 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
254 llvm::raw_ostream &OS) {
255 llvm::SetVector<const Formula *> Constraints;
256 Constraints.insert(&arena().makeAtomRef(Token));
257 addTransitiveFlowConditionConstraints(Token, Constraints);
258
259 OS << "Flow condition token: " << Token << "\n";
260 SimplifyConstraintsInfo Info;
261 llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
262 simplifyConstraints(Constraints, arena(), &Info);
263 if (!Constraints.empty()) {
264 OS << "Constraints:\n";
265 for (const auto *Constraint : Constraints) {
266 Constraint->print(OS);
267 OS << "\n";
268 }
269 }
270 if (!Info.TrueAtoms.empty()) {
271 OS << "True atoms: ";
272 printAtomList(Info.TrueAtoms, OS);
273 }
274 if (!Info.FalseAtoms.empty()) {
275 OS << "False atoms: ";
276 printAtomList(Info.FalseAtoms, OS);
277 }
278 if (!Info.EquivalentAtoms.empty()) {
279 OS << "Equivalent atoms:\n";
280 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
281 printAtomList(Class, OS);
282 }
283
284 OS << "\nFlow condition constraints before simplification:\n";
285 for (const auto *Constraint : OriginalConstraints) {
286 Constraint->print(OS);
287 OS << "\n";
288 }
289 }
290
291 const ControlFlowContext *
getControlFlowContext(const FunctionDecl * F)292 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
293 // Canonicalize the key:
294 F = F->getDefinition();
295 if (F == nullptr)
296 return nullptr;
297 auto It = FunctionContexts.find(F);
298 if (It != FunctionContexts.end())
299 return &It->second;
300
301 if (F->doesThisDeclarationHaveABody()) {
302 auto CFCtx = ControlFlowContext::build(*F);
303 // FIXME: Handle errors.
304 assert(CFCtx);
305 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)});
306 return &Result.first->second;
307 }
308
309 return nullptr;
310 }
311
makeLoggerFromCommandLine()312 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
313 if (DataflowLog.empty())
314 return Logger::textual(llvm::errs());
315
316 llvm::StringRef Dir = DataflowLog;
317 if (auto EC = llvm::sys::fs::create_directories(Dir))
318 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
319 // All analysis runs within a process will log to the same directory.
320 // Share a counter so they don't all overwrite each other's 0.html.
321 // (Don't share a logger, it's not threadsafe).
322 static std::atomic<unsigned> Counter = {0};
323 auto StreamFactory =
324 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
325 llvm::SmallString<256> File(Dir);
326 llvm::sys::path::append(File,
327 std::to_string(Counter.fetch_add(1)) + ".html");
328 std::error_code EC;
329 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
330 if (EC) {
331 llvm::errs() << "Failed to create log " << File << ": " << EC.message()
332 << "\n";
333 return std::make_unique<llvm::raw_null_ostream>();
334 }
335 return OS;
336 };
337 return Logger::html(std::move(StreamFactory));
338 }
339
DataflowAnalysisContext(std::unique_ptr<Solver> S,Options Opts)340 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
341 Options Opts)
342 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
343 assert(this->S != nullptr);
344 // If the -dataflow-log command-line flag was set, synthesize a logger.
345 // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
346 // based tools.
347 if (Opts.Log == nullptr) {
348 if (DataflowLog.getNumOccurrences() > 0) {
349 LogOwner = makeLoggerFromCommandLine();
350 this->Opts.Log = LogOwner.get();
351 // FIXME: if the flag is given a value, write an HTML log to a file.
352 } else {
353 this->Opts.Log = &Logger::null();
354 }
355 }
356 }
357
358 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
359
360 } // namespace dataflow
361 } // namespace clang
362
363 using namespace clang;
364
ignoreCFGOmittedNodes(const Expr & E)365 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) {
366 const Expr *Current = &E;
367 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
368 Current = EWC->getSubExpr();
369 assert(Current != nullptr);
370 }
371 Current = Current->IgnoreParens();
372 assert(Current != nullptr);
373 return *Current;
374 }
375
ignoreCFGOmittedNodes(const Stmt & S)376 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) {
377 if (auto *E = dyn_cast<Expr>(&S))
378 return ignoreCFGOmittedNodes(*E);
379 return S;
380 }
381
382 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
383 // field decl will be modeled for all instances of the inherited field.
getFieldsFromClassHierarchy(QualType Type,clang::dataflow::FieldSet & Fields)384 static void getFieldsFromClassHierarchy(QualType Type,
385 clang::dataflow::FieldSet &Fields) {
386 if (Type->isIncompleteType() || Type->isDependentType() ||
387 !Type->isRecordType())
388 return;
389
390 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
391 Fields.insert(Field);
392 if (auto *CXXRecord = Type->getAsCXXRecordDecl())
393 for (const CXXBaseSpecifier &Base : CXXRecord->bases())
394 getFieldsFromClassHierarchy(Base.getType(), Fields);
395 }
396
397 /// Gets the set of all fields in the type.
getObjectFields(QualType Type)398 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) {
399 FieldSet Fields;
400 getFieldsFromClassHierarchy(Type, Fields);
401 return Fields;
402 }
403
containsSameFields(const clang::dataflow::FieldSet & Fields,const clang::dataflow::RecordStorageLocation::FieldToLoc & FieldLocs)404 bool clang::dataflow::containsSameFields(
405 const clang::dataflow::FieldSet &Fields,
406 const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) {
407 if (Fields.size() != FieldLocs.size())
408 return false;
409 for ([[maybe_unused]] auto [Field, Loc] : FieldLocs)
410 if (!Fields.contains(cast_or_null<FieldDecl>(Field)))
411 return false;
412 return true;
413 }
414