1 //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a CFL-base, summary-based alias analysis algorithm. It
10 // does not depend on types. The algorithm is a mixture of the one described in
11 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
12 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
13 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
14 // graph of the uses of a variable, where each node is a memory location, and
15 // each edge is an action that happened on that memory location. The "actions"
16 // can be one of Dereference, Reference, or Assign. The precision of this
17 // analysis is roughly the same as that of an one level context-sensitive
18 // Steensgaard's algorithm.
19 //
20 // Two variables are considered as aliasing iff you can reach one value's node
21 // from the other value's node and the language formed by concatenating all of
22 // the edge labels (actions) conforms to a context-free grammar.
23 //
24 // Because this algorithm requires a graph search on each query, we execute the
25 // algorithm outlined in "Fast algorithms..." (mentioned above)
26 // in order to transform the graph into sets of variables that may alias in
27 // ~nlogn time (n = number of variables), which makes queries take constant
28 // time.
29 //===----------------------------------------------------------------------===//
30
31 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
32 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
33 // FunctionPasses are only allowed to inspect the Function that they're being
34 // run on. Realistically, this likely isn't a problem until we allow
35 // FunctionPasses to run concurrently.
36
37 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
38 #include "AliasAnalysisSummary.h"
39 #include "CFLGraph.h"
40 #include "StratifiedSets.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <limits>
55 #include <memory>
56 #include <utility>
57
58 using namespace llvm;
59 using namespace llvm::cflaa;
60
61 #define DEBUG_TYPE "cfl-steens-aa"
62
CFLSteensAAResult(const TargetLibraryInfo & TLI)63 CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
64 : AAResultBase(), TLI(TLI) {}
CFLSteensAAResult(CFLSteensAAResult && Arg)65 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
66 : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
67 CFLSteensAAResult::~CFLSteensAAResult() = default;
68
69 /// Information we have about a function and would like to keep around.
70 class CFLSteensAAResult::FunctionInfo {
71 StratifiedSets<InstantiatedValue> Sets;
72 AliasSummary Summary;
73
74 public:
75 FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
76 StratifiedSets<InstantiatedValue> S);
77
getStratifiedSets() const78 const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
79 return Sets;
80 }
81
getAliasSummary() const82 const AliasSummary &getAliasSummary() const { return Summary; }
83 };
84
85 const StratifiedIndex StratifiedLink::SetSentinel =
86 std::numeric_limits<StratifiedIndex>::max();
87
88 //===----------------------------------------------------------------------===//
89 // Function declarations that require types defined in the namespace above
90 //===----------------------------------------------------------------------===//
91
92 /// Determines whether it would be pointless to add the given Value to our sets.
canSkipAddingToSets(Value * Val)93 static bool canSkipAddingToSets(Value *Val) {
94 // Constants can share instances, which may falsely unify multiple
95 // sets, e.g. in
96 // store i32* null, i32** %ptr1
97 // store i32* null, i32** %ptr2
98 // clearly ptr1 and ptr2 should not be unified into the same set, so
99 // we should filter out the (potentially shared) instance to
100 // i32* null.
101 if (isa<Constant>(Val)) {
102 // TODO: Because all of these things are constant, we can determine whether
103 // the data is *actually* mutable at graph building time. This will probably
104 // come for free/cheap with offset awareness.
105 bool CanStoreMutableData = isa<GlobalValue>(Val) ||
106 isa<ConstantExpr>(Val) ||
107 isa<ConstantAggregate>(Val);
108 return !CanStoreMutableData;
109 }
110
111 return false;
112 }
113
FunctionInfo(Function & Fn,const SmallVectorImpl<Value * > & RetVals,StratifiedSets<InstantiatedValue> S)114 CFLSteensAAResult::FunctionInfo::FunctionInfo(
115 Function &Fn, const SmallVectorImpl<Value *> &RetVals,
116 StratifiedSets<InstantiatedValue> S)
117 : Sets(std::move(S)) {
118 // Historically, an arbitrary upper-bound of 50 args was selected. We may want
119 // to remove this if it doesn't really matter in practice.
120 if (Fn.arg_size() > MaxSupportedArgsInSummary)
121 return;
122
123 DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
124
125 // Our intention here is to record all InterfaceValues that share the same
126 // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
127 // have its StratifiedIndex scanned here and check if the index is presented
128 // in InterfaceMap: if it is not, we add the correspondence to the map;
129 // otherwise, an aliasing relation is found and we add it to
130 // RetParamRelations.
131
132 auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
133 StratifiedIndex SetIndex) {
134 unsigned Level = 0;
135 while (true) {
136 InterfaceValue CurrValue{InterfaceIndex, Level};
137
138 auto Itr = InterfaceMap.find(SetIndex);
139 if (Itr != InterfaceMap.end()) {
140 if (CurrValue != Itr->second)
141 Summary.RetParamRelations.push_back(
142 ExternalRelation{CurrValue, Itr->second, UnknownOffset});
143 break;
144 }
145
146 auto &Link = Sets.getLink(SetIndex);
147 InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
148 auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
149 if (ExternalAttrs.any())
150 Summary.RetParamAttributes.push_back(
151 ExternalAttribute{CurrValue, ExternalAttrs});
152
153 if (!Link.hasBelow())
154 break;
155
156 ++Level;
157 SetIndex = Link.Below;
158 }
159 };
160
161 // Populate RetParamRelations for return values
162 for (auto *RetVal : RetVals) {
163 assert(RetVal != nullptr);
164 assert(RetVal->getType()->isPointerTy());
165 auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
166 if (RetInfo.hasValue())
167 AddToRetParamRelations(0, RetInfo->Index);
168 }
169
170 // Populate RetParamRelations for parameters
171 unsigned I = 0;
172 for (auto &Param : Fn.args()) {
173 if (Param.getType()->isPointerTy()) {
174 auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
175 if (ParamInfo.hasValue())
176 AddToRetParamRelations(I + 1, ParamInfo->Index);
177 }
178 ++I;
179 }
180 }
181
182 // Builds the graph + StratifiedSets for a function.
buildSetsFrom(Function * Fn)183 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
184 CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
185 StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
186
187 // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
188 auto &Graph = GraphBuilder.getCFLGraph();
189 for (const auto &Mapping : Graph.value_mappings()) {
190 auto Val = Mapping.first;
191 if (canSkipAddingToSets(Val))
192 continue;
193 auto &ValueInfo = Mapping.second;
194
195 assert(ValueInfo.getNumLevels() > 0);
196 SetBuilder.add(InstantiatedValue{Val, 0});
197 SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
198 ValueInfo.getNodeInfoAtLevel(0).Attr);
199 for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
200 SetBuilder.add(InstantiatedValue{Val, I + 1});
201 SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
202 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
203 SetBuilder.addBelow(InstantiatedValue{Val, I},
204 InstantiatedValue{Val, I + 1});
205 }
206 }
207
208 // Add all assign edges to StratifiedSets
209 for (const auto &Mapping : Graph.value_mappings()) {
210 auto Val = Mapping.first;
211 if (canSkipAddingToSets(Val))
212 continue;
213 auto &ValueInfo = Mapping.second;
214
215 for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
216 auto Src = InstantiatedValue{Val, I};
217 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
218 SetBuilder.addWith(Src, Edge.Other);
219 }
220 }
221
222 return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
223 }
224
scan(Function * Fn)225 void CFLSteensAAResult::scan(Function *Fn) {
226 auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
227 (void)InsertPair;
228 assert(InsertPair.second &&
229 "Trying to scan a function that has already been cached");
230
231 // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
232 // may get evaluated after operator[], potentially triggering a DenseMap
233 // resize and invalidating the reference returned by operator[]
234 auto FunInfo = buildSetsFrom(Fn);
235 Cache[Fn] = std::move(FunInfo);
236
237 Handles.emplace_front(Fn, this);
238 }
239
evict(Function * Fn)240 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
241
242 /// Ensures that the given function is available in the cache, and returns the
243 /// entry.
244 const Optional<CFLSteensAAResult::FunctionInfo> &
ensureCached(Function * Fn)245 CFLSteensAAResult::ensureCached(Function *Fn) {
246 auto Iter = Cache.find(Fn);
247 if (Iter == Cache.end()) {
248 scan(Fn);
249 Iter = Cache.find(Fn);
250 assert(Iter != Cache.end());
251 assert(Iter->second.hasValue());
252 }
253 return Iter->second;
254 }
255
getAliasSummary(Function & Fn)256 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
257 auto &FunInfo = ensureCached(&Fn);
258 if (FunInfo.hasValue())
259 return &FunInfo->getAliasSummary();
260 else
261 return nullptr;
262 }
263
query(const MemoryLocation & LocA,const MemoryLocation & LocB)264 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
265 const MemoryLocation &LocB) {
266 auto *ValA = const_cast<Value *>(LocA.Ptr);
267 auto *ValB = const_cast<Value *>(LocB.Ptr);
268
269 if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
270 return NoAlias;
271
272 Function *Fn = nullptr;
273 Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
274 Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
275 if (!MaybeFnA && !MaybeFnB) {
276 // The only times this is known to happen are when globals + InlineAsm are
277 // involved
278 LLVM_DEBUG(
279 dbgs()
280 << "CFLSteensAA: could not extract parent function information.\n");
281 return MayAlias;
282 }
283
284 if (MaybeFnA) {
285 Fn = MaybeFnA;
286 assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
287 "Interprocedural queries not supported");
288 } else {
289 Fn = MaybeFnB;
290 }
291
292 assert(Fn != nullptr);
293 auto &MaybeInfo = ensureCached(Fn);
294 assert(MaybeInfo.hasValue());
295
296 auto &Sets = MaybeInfo->getStratifiedSets();
297 auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
298 if (!MaybeA.hasValue())
299 return MayAlias;
300
301 auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
302 if (!MaybeB.hasValue())
303 return MayAlias;
304
305 auto SetA = *MaybeA;
306 auto SetB = *MaybeB;
307 auto AttrsA = Sets.getLink(SetA.Index).Attrs;
308 auto AttrsB = Sets.getLink(SetB.Index).Attrs;
309
310 // If both values are local (meaning the corresponding set has attribute
311 // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
312 // they may-alias each other if and only if they are in the same set.
313 // If at least one value is non-local (meaning it either is global/argument or
314 // it comes from unknown sources like integer cast), the situation becomes a
315 // bit more interesting. We follow three general rules described below:
316 // - Non-local values may alias each other
317 // - AttrNone values do not alias any non-local values
318 // - AttrEscaped do not alias globals/arguments, but they may alias
319 // AttrUnknown values
320 if (SetA.Index == SetB.Index)
321 return MayAlias;
322 if (AttrsA.none() || AttrsB.none())
323 return NoAlias;
324 if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
325 return MayAlias;
326 if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
327 return MayAlias;
328 return NoAlias;
329 }
330
331 AnalysisKey CFLSteensAA::Key;
332
run(Function & F,FunctionAnalysisManager & AM)333 CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
334 return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
335 }
336
337 char CFLSteensAAWrapperPass::ID = 0;
338 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
339 "Unification-Based CFL Alias Analysis", false, true)
340
createCFLSteensAAWrapperPass()341 ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
342 return new CFLSteensAAWrapperPass();
343 }
344
CFLSteensAAWrapperPass()345 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
346 initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
347 }
348
initializePass()349 void CFLSteensAAWrapperPass::initializePass() {
350 auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
351 Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
352 }
353
getAnalysisUsage(AnalysisUsage & AU) const354 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
355 AU.setPreservesAll();
356 AU.addRequired<TargetLibraryInfoWrapperPass>();
357 }
358