1 //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a CFL-base, summary-based alias analysis algorithm. It
10 // does not depend on types. The algorithm is a mixture of the one described in
11 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
12 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
13 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
14 // graph of the uses of a variable, where each node is a memory location, and
15 // each edge is an action that happened on that memory location.  The "actions"
16 // can be one of Dereference, Reference, or Assign. The precision of this
17 // analysis is roughly the same as that of an one level context-sensitive
18 // Steensgaard's algorithm.
19 //
20 // Two variables are considered as aliasing iff you can reach one value's node
21 // from the other value's node and the language formed by concatenating all of
22 // the edge labels (actions) conforms to a context-free grammar.
23 //
24 // Because this algorithm requires a graph search on each query, we execute the
25 // algorithm outlined in "Fast algorithms..." (mentioned above)
26 // in order to transform the graph into sets of variables that may alias in
27 // ~nlogn time (n = number of variables), which makes queries take constant
28 // time.
29 //===----------------------------------------------------------------------===//
30 
31 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
32 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
33 // FunctionPasses are only allowed to inspect the Function that they're being
34 // run on. Realistically, this likely isn't a problem until we allow
35 // FunctionPasses to run concurrently.
36 
37 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
38 #include "AliasAnalysisSummary.h"
39 #include "CFLGraph.h"
40 #include "StratifiedSets.h"
41 #include "llvm/ADT/DenseMap.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/InitializePasses.h"
50 #include "llvm/Pass.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 
59 using namespace llvm;
60 using namespace llvm::cflaa;
61 
62 #define DEBUG_TYPE "cfl-steens-aa"
63 
CFLSteensAAResult(std::function<const TargetLibraryInfo & (Function & F)> GetTLI)64 CFLSteensAAResult::CFLSteensAAResult(
65     std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
66     : AAResultBase(), GetTLI(std::move(GetTLI)) {}
CFLSteensAAResult(CFLSteensAAResult && Arg)67 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
68     : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
69 CFLSteensAAResult::~CFLSteensAAResult() = default;
70 
71 /// Information we have about a function and would like to keep around.
72 class CFLSteensAAResult::FunctionInfo {
73   StratifiedSets<InstantiatedValue> Sets;
74   AliasSummary Summary;
75 
76 public:
77   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
78                StratifiedSets<InstantiatedValue> S);
79 
getStratifiedSets() const80   const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
81     return Sets;
82   }
83 
getAliasSummary() const84   const AliasSummary &getAliasSummary() const { return Summary; }
85 };
86 
87 const StratifiedIndex StratifiedLink::SetSentinel =
88     std::numeric_limits<StratifiedIndex>::max();
89 
90 //===----------------------------------------------------------------------===//
91 // Function declarations that require types defined in the namespace above
92 //===----------------------------------------------------------------------===//
93 
94 /// Determines whether it would be pointless to add the given Value to our sets.
canSkipAddingToSets(Value * Val)95 static bool canSkipAddingToSets(Value *Val) {
96   // Constants can share instances, which may falsely unify multiple
97   // sets, e.g. in
98   // store i32* null, i32** %ptr1
99   // store i32* null, i32** %ptr2
100   // clearly ptr1 and ptr2 should not be unified into the same set, so
101   // we should filter out the (potentially shared) instance to
102   // i32* null.
103   if (isa<Constant>(Val)) {
104     // TODO: Because all of these things are constant, we can determine whether
105     // the data is *actually* mutable at graph building time. This will probably
106     // come for free/cheap with offset awareness.
107     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
108                                isa<ConstantExpr>(Val) ||
109                                isa<ConstantAggregate>(Val);
110     return !CanStoreMutableData;
111   }
112 
113   return false;
114 }
115 
FunctionInfo(Function & Fn,const SmallVectorImpl<Value * > & RetVals,StratifiedSets<InstantiatedValue> S)116 CFLSteensAAResult::FunctionInfo::FunctionInfo(
117     Function &Fn, const SmallVectorImpl<Value *> &RetVals,
118     StratifiedSets<InstantiatedValue> S)
119     : Sets(std::move(S)) {
120   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
121   // to remove this if it doesn't really matter in practice.
122   if (Fn.arg_size() > MaxSupportedArgsInSummary)
123     return;
124 
125   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
126 
127   // Our intention here is to record all InterfaceValues that share the same
128   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
129   // have its StratifiedIndex scanned here and check if the index is presented
130   // in InterfaceMap: if it is not, we add the correspondence to the map;
131   // otherwise, an aliasing relation is found and we add it to
132   // RetParamRelations.
133 
134   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
135                                     StratifiedIndex SetIndex) {
136     unsigned Level = 0;
137     while (true) {
138       InterfaceValue CurrValue{InterfaceIndex, Level};
139 
140       auto Itr = InterfaceMap.find(SetIndex);
141       if (Itr != InterfaceMap.end()) {
142         if (CurrValue != Itr->second)
143           Summary.RetParamRelations.push_back(
144               ExternalRelation{CurrValue, Itr->second, UnknownOffset});
145         break;
146       }
147 
148       auto &Link = Sets.getLink(SetIndex);
149       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
150       auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
151       if (ExternalAttrs.any())
152         Summary.RetParamAttributes.push_back(
153             ExternalAttribute{CurrValue, ExternalAttrs});
154 
155       if (!Link.hasBelow())
156         break;
157 
158       ++Level;
159       SetIndex = Link.Below;
160     }
161   };
162 
163   // Populate RetParamRelations for return values
164   for (auto *RetVal : RetVals) {
165     assert(RetVal != nullptr);
166     assert(RetVal->getType()->isPointerTy());
167     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
168     if (RetInfo.hasValue())
169       AddToRetParamRelations(0, RetInfo->Index);
170   }
171 
172   // Populate RetParamRelations for parameters
173   unsigned I = 0;
174   for (auto &Param : Fn.args()) {
175     if (Param.getType()->isPointerTy()) {
176       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
177       if (ParamInfo.hasValue())
178         AddToRetParamRelations(I + 1, ParamInfo->Index);
179     }
180     ++I;
181   }
182 }
183 
184 // Builds the graph + StratifiedSets for a function.
buildSetsFrom(Function * Fn)185 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
186   CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
187   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
188 
189   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
190   auto &Graph = GraphBuilder.getCFLGraph();
191   for (const auto &Mapping : Graph.value_mappings()) {
192     auto Val = Mapping.first;
193     if (canSkipAddingToSets(Val))
194       continue;
195     auto &ValueInfo = Mapping.second;
196 
197     assert(ValueInfo.getNumLevels() > 0);
198     SetBuilder.add(InstantiatedValue{Val, 0});
199     SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
200                               ValueInfo.getNodeInfoAtLevel(0).Attr);
201     for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
202       SetBuilder.add(InstantiatedValue{Val, I + 1});
203       SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
204                                 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
205       SetBuilder.addBelow(InstantiatedValue{Val, I},
206                           InstantiatedValue{Val, I + 1});
207     }
208   }
209 
210   // Add all assign edges to StratifiedSets
211   for (const auto &Mapping : Graph.value_mappings()) {
212     auto Val = Mapping.first;
213     if (canSkipAddingToSets(Val))
214       continue;
215     auto &ValueInfo = Mapping.second;
216 
217     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
218       auto Src = InstantiatedValue{Val, I};
219       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
220         SetBuilder.addWith(Src, Edge.Other);
221     }
222   }
223 
224   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
225 }
226 
scan(Function * Fn)227 void CFLSteensAAResult::scan(Function *Fn) {
228   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
229   (void)InsertPair;
230   assert(InsertPair.second &&
231          "Trying to scan a function that has already been cached");
232 
233   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
234   // may get evaluated after operator[], potentially triggering a DenseMap
235   // resize and invalidating the reference returned by operator[]
236   auto FunInfo = buildSetsFrom(Fn);
237   Cache[Fn] = std::move(FunInfo);
238 
239   Handles.emplace_front(Fn, this);
240 }
241 
evict(Function * Fn)242 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
243 
244 /// Ensures that the given function is available in the cache, and returns the
245 /// entry.
246 const Optional<CFLSteensAAResult::FunctionInfo> &
ensureCached(Function * Fn)247 CFLSteensAAResult::ensureCached(Function *Fn) {
248   auto Iter = Cache.find(Fn);
249   if (Iter == Cache.end()) {
250     scan(Fn);
251     Iter = Cache.find(Fn);
252     assert(Iter != Cache.end());
253     assert(Iter->second.hasValue());
254   }
255   return Iter->second;
256 }
257 
getAliasSummary(Function & Fn)258 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
259   auto &FunInfo = ensureCached(&Fn);
260   if (FunInfo.hasValue())
261     return &FunInfo->getAliasSummary();
262   else
263     return nullptr;
264 }
265 
query(const MemoryLocation & LocA,const MemoryLocation & LocB)266 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
267                                      const MemoryLocation &LocB) {
268   auto *ValA = const_cast<Value *>(LocA.Ptr);
269   auto *ValB = const_cast<Value *>(LocB.Ptr);
270 
271   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
272     return NoAlias;
273 
274   Function *Fn = nullptr;
275   Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
276   Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
277   if (!MaybeFnA && !MaybeFnB) {
278     // The only times this is known to happen are when globals + InlineAsm are
279     // involved
280     LLVM_DEBUG(
281         dbgs()
282         << "CFLSteensAA: could not extract parent function information.\n");
283     return MayAlias;
284   }
285 
286   if (MaybeFnA) {
287     Fn = MaybeFnA;
288     assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
289            "Interprocedural queries not supported");
290   } else {
291     Fn = MaybeFnB;
292   }
293 
294   assert(Fn != nullptr);
295   auto &MaybeInfo = ensureCached(Fn);
296   assert(MaybeInfo.hasValue());
297 
298   auto &Sets = MaybeInfo->getStratifiedSets();
299   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
300   if (!MaybeA.hasValue())
301     return MayAlias;
302 
303   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
304   if (!MaybeB.hasValue())
305     return MayAlias;
306 
307   auto SetA = *MaybeA;
308   auto SetB = *MaybeB;
309   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
310   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
311 
312   // If both values are local (meaning the corresponding set has attribute
313   // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
314   // they may-alias each other if and only if they are in the same set.
315   // If at least one value is non-local (meaning it either is global/argument or
316   // it comes from unknown sources like integer cast), the situation becomes a
317   // bit more interesting. We follow three general rules described below:
318   // - Non-local values may alias each other
319   // - AttrNone values do not alias any non-local values
320   // - AttrEscaped do not alias globals/arguments, but they may alias
321   // AttrUnknown values
322   if (SetA.Index == SetB.Index)
323     return MayAlias;
324   if (AttrsA.none() || AttrsB.none())
325     return NoAlias;
326   if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
327     return MayAlias;
328   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
329     return MayAlias;
330   return NoAlias;
331 }
332 
333 AnalysisKey CFLSteensAA::Key;
334 
run(Function & F,FunctionAnalysisManager & AM)335 CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
336   auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
337     return AM.getResult<TargetLibraryAnalysis>(F);
338   };
339   return CFLSteensAAResult(GetTLI);
340 }
341 
342 char CFLSteensAAWrapperPass::ID = 0;
343 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
344                 "Unification-Based CFL Alias Analysis", false, true)
345 
createCFLSteensAAWrapperPass()346 ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
347   return new CFLSteensAAWrapperPass();
348 }
349 
CFLSteensAAWrapperPass()350 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
351   initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
352 }
353 
initializePass()354 void CFLSteensAAWrapperPass::initializePass() {
355   auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
356     return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
357   };
358   Result.reset(new CFLSteensAAResult(GetTLI));
359 }
360 
getAnalysisUsage(AnalysisUsage & AU) const361 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
362   AU.setPreservesAll();
363   AU.addRequired<TargetLibraryInfoWrapperPass>();
364 }
365