1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Create a polyhedral description for a static control flow region.
10 //
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/AssumptionCache.h"
32 #include "llvm/Analysis/Loads.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Analysis/RegionInfo.h"
36 #include "llvm/Analysis/RegionIterator.h"
37 #include "llvm/Analysis/ScalarEvolution.h"
38 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include <cassert>
57 
58 using namespace llvm;
59 using namespace polly;
60 
61 #define DEBUG_TYPE "polly-scops"
62 
63 STATISTIC(ScopFound, "Number of valid Scops");
64 STATISTIC(RichScopFound, "Number of Scops containing a loop");
65 STATISTIC(InfeasibleScops,
66           "Number of SCoPs with statically infeasible context.");
67 
68 bool polly::ModelReadOnlyScalars;
69 
70 // The maximal number of dimensions we allow during invariant load construction.
71 // More complex access ranges will result in very high compile time and are also
72 // unlikely to result in good code. This value is very high and should only
73 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
74 static int const MaxDimensionsInAccessRange = 9;
75 
76 static cl::opt<bool, true> XModelReadOnlyScalars(
77     "polly-analyze-read-only-scalars",
78     cl::desc("Model read-only scalar values in the scop description"),
79     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
80     cl::init(true), cl::cat(PollyCategory));
81 
82 static cl::opt<int>
83     OptComputeOut("polly-analysis-computeout",
84                   cl::desc("Bound the scop analysis by a maximal amount of "
85                            "computational steps (0 means no bound)"),
86                   cl::Hidden, cl::init(800000), cl::ZeroOrMore,
87                   cl::cat(PollyCategory));
88 
89 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
90     "polly-allow-dereference-of-all-function-parameters",
91     cl::desc(
92         "Treat all parameters to functions that are pointers as dereferencible."
93         " This is useful for invariant load hoisting, since we can generate"
94         " less runtime checks. This is only valid if all pointers to functions"
95         " are always initialized, so that Polly can choose to hoist"
96         " their loads. "),
97     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
98 
99 static cl::opt<bool>
100     PollyIgnoreInbounds("polly-ignore-inbounds",
101                         cl::desc("Do not take inbounds assumptions at all"),
102                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
103 
104 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
105     "polly-rtc-max-arrays-per-group",
106     cl::desc("The maximal number of arrays to compare in each alias group."),
107     cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
108 
109 static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
110     "polly-rtc-max-array-disjuncts",
111     cl::desc("The maximal number of disjunts allowed in memory accesses to "
112              "to build RTCs."),
113     cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
114 
115 static cl::opt<unsigned> RunTimeChecksMaxParameters(
116     "polly-rtc-max-parameters",
117     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
118     cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
119 
120 static cl::opt<bool> UnprofitableScalarAccs(
121     "polly-unprofitable-scalar-accs",
122     cl::desc("Count statements with scalar accesses as not optimizable"),
123     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
124 
125 static cl::opt<std::string> UserContextStr(
126     "polly-context", cl::value_desc("isl parameter set"),
127     cl::desc("Provide additional constraints on the context parameters"),
128     cl::init(""), cl::cat(PollyCategory));
129 
130 static cl::opt<bool> DetectFortranArrays(
131     "polly-detect-fortran-arrays",
132     cl::desc("Detect Fortran arrays and use this for code generation"),
133     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
134 
135 static cl::opt<bool> DetectReductions("polly-detect-reductions",
136                                       cl::desc("Detect and exploit reductions"),
137                                       cl::Hidden, cl::ZeroOrMore,
138                                       cl::init(true), cl::cat(PollyCategory));
139 
140 // Multiplicative reductions can be disabled separately as these kind of
141 // operations can overflow easily. Additive reductions and bit operations
142 // are in contrast pretty stable.
143 static cl::opt<bool> DisableMultiplicativeReductions(
144     "polly-disable-multiplicative-reductions",
145     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
146     cl::init(false), cl::cat(PollyCategory));
147 
148 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
149 
150 static cl::opt<GranularityChoice> StmtGranularity(
151     "polly-stmt-granularity",
152     cl::desc(
153         "Algorithm to use for splitting basic blocks into multiple statements"),
154     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
155                           "One statement per basic block"),
156                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
157                           "Scalar independence heuristic"),
158                clEnumValN(GranularityChoice::Stores, "store",
159                           "Store-level granularity")),
160     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
161 
162 /// Helper to treat non-affine regions and basic blocks the same.
163 ///
164 ///{
165 
166 /// Return the block that is the representing block for @p RN.
getRegionNodeBasicBlock(RegionNode * RN)167 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
168   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
169                            : RN->getNodeAs<BasicBlock>();
170 }
171 
172 /// Return the @p idx'th block that is executed after @p RN.
173 static inline BasicBlock *
getRegionNodeSuccessor(RegionNode * RN,Instruction * TI,unsigned idx)174 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
175   if (RN->isSubRegion()) {
176     assert(idx == 0);
177     return RN->getNodeAs<Region>()->getExit();
178   }
179   return TI->getSuccessor(idx);
180 }
181 
containsErrorBlock(RegionNode * RN,const Region & R,LoopInfo & LI,const DominatorTree & DT)182 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
183                                const DominatorTree &DT) {
184   if (!RN->isSubRegion())
185     return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT);
186   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
187     if (isErrorBlock(*BB, R, LI, DT))
188       return true;
189   return false;
190 }
191 
192 ///}
193 
194 /// Create a map to map from a given iteration to a subsequent iteration.
195 ///
196 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
197 /// is incremented by one and all other dimensions are equal, e.g.,
198 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
199 ///
200 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
createNextIterationMap(isl::space SetSpace,unsigned Dim)201 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
202   isl::space MapSpace = SetSpace.map_from_set();
203   isl::map NextIterationMap = isl::map::universe(MapSpace);
204   for (unsigned u = 0; u < NextIterationMap.dim(isl::dim::in); u++)
205     if (u != Dim)
206       NextIterationMap =
207           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
208   isl::constraint C =
209       isl::constraint::alloc_equality(isl::local_space(MapSpace));
210   C = C.set_constant_si(1);
211   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
212   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
213   NextIterationMap = NextIterationMap.add_constraint(C);
214   return NextIterationMap;
215 }
216 
217 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
collectBoundedParts(isl::set S)218 static isl::set collectBoundedParts(isl::set S) {
219   isl::set BoundedParts = isl::set::empty(S.get_space());
220   for (isl::basic_set BSet : S.get_basic_set_list())
221     if (BSet.is_bounded())
222       BoundedParts = BoundedParts.unite(isl::set(BSet));
223   return BoundedParts;
224 }
225 
226 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
227 ///
228 /// @returns A separation of @p S into first an unbounded then a bounded subset,
229 ///          both with regards to the dimension @p Dim.
partitionSetParts(isl::set S,unsigned Dim)230 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
231                                                        unsigned Dim) {
232   for (unsigned u = 0, e = S.n_dim(); u < e; u++)
233     S = S.lower_bound_si(isl::dim::set, u, 0);
234 
235   unsigned NumDimsS = S.n_dim();
236   isl::set OnlyDimS = S;
237 
238   // Remove dimensions that are greater than Dim as they are not interesting.
239   assert(NumDimsS >= Dim + 1);
240   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
241 
242   // Create artificial parametric upper bounds for dimensions smaller than Dim
243   // as we are not interested in them.
244   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
245 
246   for (unsigned u = 0; u < Dim; u++) {
247     isl::constraint C = isl::constraint::alloc_inequality(
248         isl::local_space(OnlyDimS.get_space()));
249     C = C.set_coefficient_si(isl::dim::param, u, 1);
250     C = C.set_coefficient_si(isl::dim::set, u, -1);
251     OnlyDimS = OnlyDimS.add_constraint(C);
252   }
253 
254   // Collect all bounded parts of OnlyDimS.
255   isl::set BoundedParts = collectBoundedParts(OnlyDimS);
256 
257   // Create the dimensions greater than Dim again.
258   BoundedParts =
259       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
260 
261   // Remove the artificial upper bound parameters again.
262   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
263 
264   isl::set UnboundedParts = S.subtract(BoundedParts);
265   return std::make_pair(UnboundedParts, BoundedParts);
266 }
267 
268 /// Create the conditions under which @p L @p Pred @p R is true.
buildConditionSet(ICmpInst::Predicate Pred,isl::pw_aff L,isl::pw_aff R)269 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
270                                   isl::pw_aff R) {
271   switch (Pred) {
272   case ICmpInst::ICMP_EQ:
273     return L.eq_set(R);
274   case ICmpInst::ICMP_NE:
275     return L.ne_set(R);
276   case ICmpInst::ICMP_SLT:
277     return L.lt_set(R);
278   case ICmpInst::ICMP_SLE:
279     return L.le_set(R);
280   case ICmpInst::ICMP_SGT:
281     return L.gt_set(R);
282   case ICmpInst::ICMP_SGE:
283     return L.ge_set(R);
284   case ICmpInst::ICMP_ULT:
285     return L.lt_set(R);
286   case ICmpInst::ICMP_UGT:
287     return L.gt_set(R);
288   case ICmpInst::ICMP_ULE:
289     return L.le_set(R);
290   case ICmpInst::ICMP_UGE:
291     return L.ge_set(R);
292   default:
293     llvm_unreachable("Non integer predicate not supported");
294   }
295 }
296 
adjustDomainDimensions(isl::set Dom,Loop * OldL,Loop * NewL)297 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
298                                              Loop *NewL) {
299   // If the loops are the same there is nothing to do.
300   if (NewL == OldL)
301     return Dom;
302 
303   int OldDepth = scop->getRelativeLoopDepth(OldL);
304   int NewDepth = scop->getRelativeLoopDepth(NewL);
305   // If both loops are non-affine loops there is nothing to do.
306   if (OldDepth == -1 && NewDepth == -1)
307     return Dom;
308 
309   // Distinguish three cases:
310   //   1) The depth is the same but the loops are not.
311   //      => One loop was left one was entered.
312   //   2) The depth increased from OldL to NewL.
313   //      => One loop was entered, none was left.
314   //   3) The depth decreased from OldL to NewL.
315   //      => Loops were left were difference of the depths defines how many.
316   if (OldDepth == NewDepth) {
317     assert(OldL->getParentLoop() == NewL->getParentLoop());
318     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
319     Dom = Dom.add_dims(isl::dim::set, 1);
320   } else if (OldDepth < NewDepth) {
321     assert(OldDepth + 1 == NewDepth);
322     auto &R = scop->getRegion();
323     (void)R;
324     assert(NewL->getParentLoop() == OldL ||
325            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
326     Dom = Dom.add_dims(isl::dim::set, 1);
327   } else {
328     assert(OldDepth > NewDepth);
329     int Diff = OldDepth - NewDepth;
330     int NumDim = Dom.n_dim();
331     assert(NumDim >= Diff);
332     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
333   }
334 
335   return Dom;
336 }
337 
338 /// Compute the isl representation for the SCEV @p E in this BB.
339 ///
340 /// @param BB               The BB for which isl representation is to be
341 /// computed.
342 /// @param InvalidDomainMap A map of BB to their invalid domains.
343 /// @param E                The SCEV that should be translated.
344 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
345 ///
346 /// Note that this function will also adjust the invalid context accordingly.
347 
348 __isl_give isl_pw_aff *
getPwAff(BasicBlock * BB,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,const SCEV * E,bool NonNegative)349 ScopBuilder::getPwAff(BasicBlock *BB,
350                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
351                       const SCEV *E, bool NonNegative) {
352   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
353   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
354   return PWAC.first.release();
355 }
356 
357 /// Build condition sets for unsigned ICmpInst(s).
358 /// Special handling is required for unsigned operands to ensure that if
359 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
360 /// it should wrap around.
361 ///
362 /// @param IsStrictUpperBound holds information on the predicate relation
363 /// between TestVal and UpperBound, i.e,
364 /// TestVal < UpperBound  OR  TestVal <= UpperBound
buildUnsignedConditionSets(BasicBlock * BB,Value * Condition,__isl_keep isl_set * Domain,const SCEV * SCEV_TestVal,const SCEV * SCEV_UpperBound,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,bool IsStrictUpperBound)365 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
366     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
367     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
368     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
369     bool IsStrictUpperBound) {
370   // Do not take NonNeg assumption on TestVal
371   // as it might have MSB (Sign bit) set.
372   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
373   // Take NonNeg assumption on UpperBound.
374   isl_pw_aff *UpperBound =
375       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
376 
377   // 0 <= TestVal
378   isl_set *First =
379       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
380                             isl_pw_aff_get_domain_space(TestVal))),
381                         isl_pw_aff_copy(TestVal));
382 
383   isl_set *Second;
384   if (IsStrictUpperBound)
385     // TestVal < UpperBound
386     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
387   else
388     // TestVal <= UpperBound
389     Second = isl_pw_aff_le_set(TestVal, UpperBound);
390 
391   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
392   return ConsequenceCondSet;
393 }
394 
buildConditionSets(BasicBlock * BB,SwitchInst * SI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)395 bool ScopBuilder::buildConditionSets(
396     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
397     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
398     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
399   Value *Condition = getConditionFromTerminator(SI);
400   assert(Condition && "No condition for switch");
401 
402   isl_pw_aff *LHS, *RHS;
403   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
404 
405   unsigned NumSuccessors = SI->getNumSuccessors();
406   ConditionSets.resize(NumSuccessors);
407   for (auto &Case : SI->cases()) {
408     unsigned Idx = Case.getSuccessorIndex();
409     ConstantInt *CaseValue = Case.getCaseValue();
410 
411     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
412     isl_set *CaseConditionSet =
413         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
414                           isl::manage(RHS))
415             .release();
416     ConditionSets[Idx] = isl_set_coalesce(
417         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
418   }
419 
420   assert(ConditionSets[0] == nullptr && "Default condition set was set");
421   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
422   for (unsigned u = 2; u < NumSuccessors; u++)
423     ConditionSetUnion =
424         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
425   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
426 
427   isl_pw_aff_free(LHS);
428 
429   return true;
430 }
431 
buildConditionSets(BasicBlock * BB,Value * Condition,Instruction * TI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)432 bool ScopBuilder::buildConditionSets(
433     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
434     __isl_keep isl_set *Domain,
435     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
436     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
437   isl_set *ConsequenceCondSet = nullptr;
438 
439   if (auto Load = dyn_cast<LoadInst>(Condition)) {
440     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
441     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
442     bool NonNeg = false;
443     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
444     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
445     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
446                                            isl::manage(RHS))
447                              .release();
448   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
449     auto *Unique = dyn_cast<ConstantInt>(
450         getUniqueNonErrorValue(PHI, &scop->getRegion(), LI, DT));
451 
452     if (Unique->isZero())
453       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
454     else
455       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
456   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
457     if (CCond->isZero())
458       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
459     else
460       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
461   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
462     auto Opcode = BinOp->getOpcode();
463     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
464 
465     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
466                                     InvalidDomainMap, ConditionSets) &&
467                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
468                                     InvalidDomainMap, ConditionSets);
469     if (!Valid) {
470       while (!ConditionSets.empty())
471         isl_set_free(ConditionSets.pop_back_val());
472       return false;
473     }
474 
475     isl_set_free(ConditionSets.pop_back_val());
476     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
477     isl_set_free(ConditionSets.pop_back_val());
478     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
479 
480     if (Opcode == Instruction::And)
481       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
482     else
483       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
484   } else {
485     auto *ICond = dyn_cast<ICmpInst>(Condition);
486     assert(ICond &&
487            "Condition of exiting branch was neither constant nor ICmp!");
488 
489     Region &R = scop->getRegion();
490 
491     isl_pw_aff *LHS, *RHS;
492     // For unsigned comparisons we assumed the signed bit of neither operand
493     // to be set. The comparison is equal to a signed comparison under this
494     // assumption.
495     bool NonNeg = ICond->isUnsigned();
496     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
497                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
498 
499     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, LI, DT);
500     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, LI, DT);
501 
502     switch (ICond->getPredicate()) {
503     case ICmpInst::ICMP_ULT:
504       ConsequenceCondSet =
505           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
506                                      RightOperand, InvalidDomainMap, true);
507       break;
508     case ICmpInst::ICMP_ULE:
509       ConsequenceCondSet =
510           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
511                                      RightOperand, InvalidDomainMap, false);
512       break;
513     case ICmpInst::ICMP_UGT:
514       ConsequenceCondSet =
515           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
516                                      LeftOperand, InvalidDomainMap, true);
517       break;
518     case ICmpInst::ICMP_UGE:
519       ConsequenceCondSet =
520           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
521                                      LeftOperand, InvalidDomainMap, false);
522       break;
523     default:
524       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
525       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
526       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
527                                              isl::manage(LHS), isl::manage(RHS))
528                                .release();
529       break;
530     }
531   }
532 
533   // If no terminator was given we are only looking for parameter constraints
534   // under which @p Condition is true/false.
535   if (!TI)
536     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
537   assert(ConsequenceCondSet);
538   ConsequenceCondSet = isl_set_coalesce(
539       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
540 
541   isl_set *AlternativeCondSet = nullptr;
542   bool TooComplex =
543       isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain;
544 
545   if (!TooComplex) {
546     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
547                                           isl_set_copy(ConsequenceCondSet));
548     TooComplex =
549         isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain;
550   }
551 
552   if (TooComplex) {
553     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
554                      TI ? TI->getParent() : nullptr /* BasicBlock */);
555     isl_set_free(AlternativeCondSet);
556     isl_set_free(ConsequenceCondSet);
557     return false;
558   }
559 
560   ConditionSets.push_back(ConsequenceCondSet);
561   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
562 
563   return true;
564 }
565 
buildConditionSets(BasicBlock * BB,Instruction * TI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)566 bool ScopBuilder::buildConditionSets(
567     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
568     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
569     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
570   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
571     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
572                               ConditionSets);
573 
574   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
575 
576   if (TI->getNumSuccessors() == 1) {
577     ConditionSets.push_back(isl_set_copy(Domain));
578     return true;
579   }
580 
581   Value *Condition = getConditionFromTerminator(TI);
582   assert(Condition && "No condition for Terminator");
583 
584   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
585                             ConditionSets);
586 }
587 
propagateDomainConstraints(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)588 bool ScopBuilder::propagateDomainConstraints(
589     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
590   // Iterate over the region R and propagate the domain constrains from the
591   // predecessors to the current node. In contrast to the
592   // buildDomainsWithBranchConstraints function, this one will pull the domain
593   // information from the predecessors instead of pushing it to the successors.
594   // Additionally, we assume the domains to be already present in the domain
595   // map here. However, we iterate again in reverse post order so we know all
596   // predecessors have been visited before a block or non-affine subregion is
597   // visited.
598 
599   ReversePostOrderTraversal<Region *> RTraversal(R);
600   for (auto *RN : RTraversal) {
601     // Recurse for affine subregions but go on for basic blocks and non-affine
602     // subregions.
603     if (RN->isSubRegion()) {
604       Region *SubRegion = RN->getNodeAs<Region>();
605       if (!scop->isNonAffineSubRegion(SubRegion)) {
606         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
607           return false;
608         continue;
609       }
610     }
611 
612     BasicBlock *BB = getRegionNodeBasicBlock(RN);
613     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
614     assert(Domain);
615 
616     // Under the union of all predecessor conditions we can reach this block.
617     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
618     Domain = Domain.intersect(PredDom).coalesce();
619     Domain = Domain.align_params(scop->getParamSpace());
620 
621     Loop *BBLoop = getRegionNodeLoop(RN, LI);
622     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
623       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
624         return false;
625   }
626 
627   return true;
628 }
629 
propagateDomainConstraintsToRegionExit(BasicBlock * BB,Loop * BBLoop,SmallPtrSetImpl<BasicBlock * > & FinishedExitBlocks,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)630 void ScopBuilder::propagateDomainConstraintsToRegionExit(
631     BasicBlock *BB, Loop *BBLoop,
632     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
633     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
634   // Check if the block @p BB is the entry of a region. If so we propagate it's
635   // domain to the exit block of the region. Otherwise we are done.
636   auto *RI = scop->getRegion().getRegionInfo();
637   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
638   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
639   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
640     return;
641 
642   // Do not propagate the domain if there is a loop backedge inside the region
643   // that would prevent the exit block from being executed.
644   auto *L = BBLoop;
645   while (L && scop->contains(L)) {
646     SmallVector<BasicBlock *, 4> LatchBBs;
647     BBLoop->getLoopLatches(LatchBBs);
648     for (auto *LatchBB : LatchBBs)
649       if (BB != LatchBB && BBReg->contains(LatchBB))
650         return;
651     L = L->getParentLoop();
652   }
653 
654   isl::set Domain = scop->getOrInitEmptyDomain(BB);
655   assert(Domain && "Cannot propagate a nullptr");
656 
657   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
658 
659   // Since the dimensions of @p BB and @p ExitBB might be different we have to
660   // adjust the domain before we can propagate it.
661   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
662   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
663 
664   // If the exit domain is not yet created we set it otherwise we "add" the
665   // current domain.
666   ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
667 
668   // Initialize the invalid domain.
669   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
670 
671   FinishedExitBlocks.insert(ExitBB);
672 }
673 
getPredecessorDomainConstraints(BasicBlock * BB,isl::set Domain)674 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
675                                                       isl::set Domain) {
676   // If @p BB is the ScopEntry we are done
677   if (scop->getRegion().getEntry() == BB)
678     return isl::set::universe(Domain.get_space());
679 
680   // The region info of this function.
681   auto &RI = *scop->getRegion().getRegionInfo();
682 
683   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
684 
685   // A domain to collect all predecessor domains, thus all conditions under
686   // which the block is executed. To this end we start with the empty domain.
687   isl::set PredDom = isl::set::empty(Domain.get_space());
688 
689   // Set of regions of which the entry block domain has been propagated to BB.
690   // all predecessors inside any of the regions can be skipped.
691   SmallSet<Region *, 8> PropagatedRegions;
692 
693   for (auto *PredBB : predecessors(BB)) {
694     // Skip backedges.
695     if (DT.dominates(BB, PredBB))
696       continue;
697 
698     // If the predecessor is in a region we used for propagation we can skip it.
699     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
700     if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
701                     PredBBInRegion)) {
702       continue;
703     }
704 
705     // Check if there is a valid region we can use for propagation, thus look
706     // for a region that contains the predecessor and has @p BB as exit block.
707     auto *PredR = RI.getRegionFor(PredBB);
708     while (PredR->getExit() != BB && !PredR->contains(BB))
709       PredR->getParent();
710 
711     // If a valid region for propagation was found use the entry of that region
712     // for propagation, otherwise the PredBB directly.
713     if (PredR->getExit() == BB) {
714       PredBB = PredR->getEntry();
715       PropagatedRegions.insert(PredR);
716     }
717 
718     isl::set PredBBDom = scop->getDomainConditions(PredBB);
719     Loop *PredBBLoop =
720         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
721     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
722     PredDom = PredDom.unite(PredBBDom);
723   }
724 
725   return PredDom;
726 }
727 
addLoopBoundsToHeaderDomain(Loop * L,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)728 bool ScopBuilder::addLoopBoundsToHeaderDomain(
729     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
730   int LoopDepth = scop->getRelativeLoopDepth(L);
731   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
732 
733   BasicBlock *HeaderBB = L->getHeader();
734   assert(scop->isDomainDefined(HeaderBB));
735   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
736 
737   isl::map NextIterationMap =
738       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
739 
740   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
741 
742   SmallVector<BasicBlock *, 4> LatchBlocks;
743   L->getLoopLatches(LatchBlocks);
744 
745   for (BasicBlock *LatchBB : LatchBlocks) {
746     // If the latch is only reachable via error statements we skip it.
747     if (!scop->isDomainDefined(LatchBB))
748       continue;
749 
750     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
751 
752     isl::set BackedgeCondition = nullptr;
753 
754     Instruction *TI = LatchBB->getTerminator();
755     BranchInst *BI = dyn_cast<BranchInst>(TI);
756     assert(BI && "Only branch instructions allowed in loop latches");
757 
758     if (BI->isUnconditional())
759       BackedgeCondition = LatchBBDom;
760     else {
761       SmallVector<isl_set *, 8> ConditionSets;
762       int idx = BI->getSuccessor(0) != HeaderBB;
763       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
764                               InvalidDomainMap, ConditionSets))
765         return false;
766 
767       // Free the non back edge condition set as we do not need it.
768       isl_set_free(ConditionSets[1 - idx]);
769 
770       BackedgeCondition = isl::manage(ConditionSets[idx]);
771     }
772 
773     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
774     assert(LatchLoopDepth >= LoopDepth);
775     BackedgeCondition = BackedgeCondition.project_out(
776         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
777     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
778   }
779 
780   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
781   for (int i = 0; i < LoopDepth; i++)
782     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
783 
784   isl::set UnionBackedgeConditionComplement =
785       UnionBackedgeCondition.complement();
786   UnionBackedgeConditionComplement =
787       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
788                                                       0);
789   UnionBackedgeConditionComplement =
790       UnionBackedgeConditionComplement.apply(ForwardMap);
791   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
792   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
793 
794   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
795   HeaderBBDom = Parts.second;
796 
797   // Check if there is a <nsw> tagged AddRec for this loop and if so do not
798   // require a runtime check. The assumption is already implied by the <nsw>
799   // tag.
800   bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
801 
802   isl::set UnboundedCtx = Parts.first.params();
803   recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
804                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
805                    nullptr, RequiresRTC);
806   return true;
807 }
808 
buildInvariantEquivalenceClasses()809 void ScopBuilder::buildInvariantEquivalenceClasses() {
810   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
811 
812   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
813   for (LoadInst *LInst : RIL) {
814     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
815 
816     Type *Ty = LInst->getType();
817     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
818     if (ClassRep) {
819       scop->addInvariantLoadMapping(LInst, ClassRep);
820       continue;
821     }
822 
823     ClassRep = LInst;
824     scop->addInvariantEquivClass(
825         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
826   }
827 }
828 
buildDomains(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)829 bool ScopBuilder::buildDomains(
830     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
831   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
832   auto *EntryBB = R->getEntry();
833   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
834   int LD = scop->getRelativeLoopDepth(L);
835   auto *S =
836       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
837 
838   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
839   isl::noexceptions::set Domain = isl::manage(S);
840   scop->setDomain(EntryBB, Domain);
841 
842   if (IsOnlyNonAffineRegion)
843     return !containsErrorBlock(R->getNode(), *R, LI, DT);
844 
845   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
846     return false;
847 
848   if (!propagateDomainConstraints(R, InvalidDomainMap))
849     return false;
850 
851   // Error blocks and blocks dominated by them have been assumed to never be
852   // executed. Representing them in the Scop does not add any value. In fact,
853   // it is likely to cause issues during construction of the ScopStmts. The
854   // contents of error blocks have not been verified to be expressible and
855   // will cause problems when building up a ScopStmt for them.
856   // Furthermore, basic blocks dominated by error blocks may reference
857   // instructions in the error block which, if the error block is not modeled,
858   // can themselves not be constructed properly. To this end we will replace
859   // the domains of error blocks and those only reachable via error blocks
860   // with an empty set. Additionally, we will record for each block under which
861   // parameter combination it would be reached via an error block in its
862   // InvalidDomain. This information is needed during load hoisting.
863   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
864     return false;
865 
866   return true;
867 }
868 
buildDomainsWithBranchConstraints(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)869 bool ScopBuilder::buildDomainsWithBranchConstraints(
870     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
871   // To create the domain for each block in R we iterate over all blocks and
872   // subregions in R and propagate the conditions under which the current region
873   // element is executed. To this end we iterate in reverse post order over R as
874   // it ensures that we first visit all predecessors of a region node (either a
875   // basic block or a subregion) before we visit the region node itself.
876   // Initially, only the domain for the SCoP region entry block is set and from
877   // there we propagate the current domain to all successors, however we add the
878   // condition that the successor is actually executed next.
879   // As we are only interested in non-loop carried constraints here we can
880   // simply skip loop back edges.
881 
882   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
883   ReversePostOrderTraversal<Region *> RTraversal(R);
884   for (auto *RN : RTraversal) {
885     // Recurse for affine subregions but go on for basic blocks and non-affine
886     // subregions.
887     if (RN->isSubRegion()) {
888       Region *SubRegion = RN->getNodeAs<Region>();
889       if (!scop->isNonAffineSubRegion(SubRegion)) {
890         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
891           return false;
892         continue;
893       }
894     }
895 
896     if (containsErrorBlock(RN, scop->getRegion(), LI, DT))
897       scop->notifyErrorBlock();
898     ;
899 
900     BasicBlock *BB = getRegionNodeBasicBlock(RN);
901     Instruction *TI = BB->getTerminator();
902 
903     if (isa<UnreachableInst>(TI))
904       continue;
905 
906     if (!scop->isDomainDefined(BB))
907       continue;
908     isl::set Domain = scop->getDomainConditions(BB);
909 
910     scop->updateMaxLoopDepth(isl_set_n_dim(Domain.get()));
911 
912     auto *BBLoop = getRegionNodeLoop(RN, LI);
913     // Propagate the domain from BB directly to blocks that have a superset
914     // domain, at the moment only region exit nodes of regions that start in BB.
915     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
916                                            InvalidDomainMap);
917 
918     // If all successors of BB have been set a domain through the propagation
919     // above we do not need to build condition sets but can just skip this
920     // block. However, it is important to note that this is a local property
921     // with regards to the region @p R. To this end FinishedExitBlocks is a
922     // local variable.
923     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
924       return FinishedExitBlocks.count(SuccBB);
925     };
926     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
927       continue;
928 
929     // Build the condition sets for the successor nodes of the current region
930     // node. If it is a non-affine subregion we will always execute the single
931     // exit node, hence the single entry node domain is the condition set. For
932     // basic blocks we use the helper function buildConditionSets.
933     SmallVector<isl_set *, 8> ConditionSets;
934     if (RN->isSubRegion())
935       ConditionSets.push_back(Domain.copy());
936     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
937                                  ConditionSets))
938       return false;
939 
940     // Now iterate over the successors and set their initial domain based on
941     // their condition set. We skip back edges here and have to be careful when
942     // we leave a loop not to keep constraints over a dimension that doesn't
943     // exist anymore.
944     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
945     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
946       isl::set CondSet = isl::manage(ConditionSets[u]);
947       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
948 
949       // Skip blocks outside the region.
950       if (!scop->contains(SuccBB))
951         continue;
952 
953       // If we propagate the domain of some block to "SuccBB" we do not have to
954       // adjust the domain.
955       if (FinishedExitBlocks.count(SuccBB))
956         continue;
957 
958       // Skip back edges.
959       if (DT.dominates(SuccBB, BB))
960         continue;
961 
962       Loop *SuccBBLoop =
963           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
964 
965       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
966 
967       // Set the domain for the successor or merge it with an existing domain in
968       // case there are multiple paths (without loop back edges) to the
969       // successor block.
970       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
971 
972       if (SuccDomain) {
973         SuccDomain = SuccDomain.unite(CondSet).coalesce();
974       } else {
975         // Initialize the invalid domain.
976         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
977         SuccDomain = CondSet;
978       }
979 
980       SuccDomain = SuccDomain.detect_equalities();
981 
982       // Check if the maximal number of domain disjunctions was reached.
983       // In case this happens we will clean up and bail.
984       if (SuccDomain.n_basic_set() < MaxDisjunctsInDomain)
985         continue;
986 
987       scop->invalidate(COMPLEXITY, DebugLoc());
988       while (++u < ConditionSets.size())
989         isl_set_free(ConditionSets[u]);
990       return false;
991     }
992   }
993 
994   return true;
995 }
996 
propagateInvalidStmtDomains(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)997 bool ScopBuilder::propagateInvalidStmtDomains(
998     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
999   ReversePostOrderTraversal<Region *> RTraversal(R);
1000   for (auto *RN : RTraversal) {
1001 
1002     // Recurse for affine subregions but go on for basic blocks and non-affine
1003     // subregions.
1004     if (RN->isSubRegion()) {
1005       Region *SubRegion = RN->getNodeAs<Region>();
1006       if (!scop->isNonAffineSubRegion(SubRegion)) {
1007         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1008         continue;
1009       }
1010     }
1011 
1012     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), LI, DT);
1013     BasicBlock *BB = getRegionNodeBasicBlock(RN);
1014     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1015     assert(Domain && "Cannot propagate a nullptr");
1016 
1017     isl::set InvalidDomain = InvalidDomainMap[BB];
1018 
1019     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1020 
1021     if (!IsInvalidBlock) {
1022       InvalidDomain = InvalidDomain.intersect(Domain);
1023     } else {
1024       InvalidDomain = Domain;
1025       isl::set DomPar = Domain.params();
1026       recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
1027                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1028       Domain = isl::set::empty(Domain.get_space());
1029     }
1030 
1031     if (InvalidDomain.is_empty()) {
1032       InvalidDomainMap[BB] = InvalidDomain;
1033       continue;
1034     }
1035 
1036     auto *BBLoop = getRegionNodeLoop(RN, LI);
1037     auto *TI = BB->getTerminator();
1038     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1039     for (unsigned u = 0; u < NumSuccs; u++) {
1040       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1041 
1042       // Skip successors outside the SCoP.
1043       if (!scop->contains(SuccBB))
1044         continue;
1045 
1046       // Skip backedges.
1047       if (DT.dominates(SuccBB, BB))
1048         continue;
1049 
1050       Loop *SuccBBLoop =
1051           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1052 
1053       auto AdjustedInvalidDomain =
1054           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1055 
1056       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1057       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1058       SuccInvalidDomain = SuccInvalidDomain.coalesce();
1059 
1060       InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1061 
1062       // Check if the maximal number of domain disjunctions was reached.
1063       // In case this happens we will bail.
1064       if (SuccInvalidDomain.n_basic_set() < MaxDisjunctsInDomain)
1065         continue;
1066 
1067       InvalidDomainMap.erase(BB);
1068       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1069       return false;
1070     }
1071 
1072     InvalidDomainMap[BB] = InvalidDomain;
1073   }
1074 
1075   return true;
1076 }
1077 
buildPHIAccesses(ScopStmt * PHIStmt,PHINode * PHI,Region * NonAffineSubRegion,bool IsExitBlock)1078 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1079                                    Region *NonAffineSubRegion,
1080                                    bool IsExitBlock) {
1081   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1082   // true, are not modeled as ordinary PHI nodes as they are not part of the
1083   // region. However, we model the operands in the predecessor blocks that are
1084   // part of the region as regular scalar accesses.
1085 
1086   // If we can synthesize a PHI we can skip it, however only if it is in
1087   // the region. If it is not it can only be in the exit block of the region.
1088   // In this case we model the operands but not the PHI itself.
1089   auto *Scope = LI.getLoopFor(PHI->getParent());
1090   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1091     return;
1092 
1093   // PHI nodes are modeled as if they had been demoted prior to the SCoP
1094   // detection. Hence, the PHI is a load of a new memory location in which the
1095   // incoming value was written at the end of the incoming basic block.
1096   bool OnlyNonAffineSubRegionOperands = true;
1097   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1098     Value *Op = PHI->getIncomingValue(u);
1099     BasicBlock *OpBB = PHI->getIncomingBlock(u);
1100     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1101 
1102     // Do not build PHI dependences inside a non-affine subregion, but make
1103     // sure that the necessary scalar values are still made available.
1104     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1105       auto *OpInst = dyn_cast<Instruction>(Op);
1106       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1107         ensureValueRead(Op, OpStmt);
1108       continue;
1109     }
1110 
1111     OnlyNonAffineSubRegionOperands = false;
1112     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1113   }
1114 
1115   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1116     addPHIReadAccess(PHIStmt, PHI);
1117   }
1118 }
1119 
buildScalarDependences(ScopStmt * UserStmt,Instruction * Inst)1120 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1121                                          Instruction *Inst) {
1122   assert(!isa<PHINode>(Inst));
1123 
1124   // Pull-in required operands.
1125   for (Use &Op : Inst->operands())
1126     ensureValueRead(Op.get(), UserStmt);
1127 }
1128 
1129 // Create a sequence of two schedules. Either argument may be null and is
1130 // interpreted as the empty schedule. Can also return null if both schedules are
1131 // empty.
combineInSequence(isl::schedule Prev,isl::schedule Succ)1132 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1133   if (!Prev)
1134     return Succ;
1135   if (!Succ)
1136     return Prev;
1137 
1138   return Prev.sequence(Succ);
1139 }
1140 
1141 // Create an isl_multi_union_aff that defines an identity mapping from the
1142 // elements of USet to their N-th dimension.
1143 //
1144 // # Example:
1145 //
1146 //            Domain: { A[i,j]; B[i,j,k] }
1147 //                 N: 1
1148 //
1149 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1150 //
1151 // @param USet   A union set describing the elements for which to generate a
1152 //               mapping.
1153 // @param N      The dimension to map to.
1154 // @returns      A mapping from USet to its N-th dimension.
mapToDimension(isl::union_set USet,int N)1155 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
1156   assert(N >= 0);
1157   assert(USet);
1158   assert(!USet.is_empty());
1159 
1160   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1161 
1162   for (isl::set S : USet.get_set_list()) {
1163     int Dim = S.dim(isl::dim::set);
1164     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1165                                                   N, Dim - N);
1166     if (N > 1)
1167       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1168 
1169     Result = Result.add_pw_multi_aff(PMA);
1170   }
1171 
1172   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1173 }
1174 
buildSchedule()1175 void ScopBuilder::buildSchedule() {
1176   Loop *L = getLoopSurroundingScop(*scop, LI);
1177   LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
1178   buildSchedule(scop->getRegion().getNode(), LoopStack);
1179   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1180   scop->setScheduleTree(LoopStack[0].Schedule);
1181 }
1182 
1183 /// To generate a schedule for the elements in a Region we traverse the Region
1184 /// in reverse-post-order and add the contained RegionNodes in traversal order
1185 /// to the schedule of the loop that is currently at the top of the LoopStack.
1186 /// For loop-free codes, this results in a correct sequential ordering.
1187 ///
1188 /// Example:
1189 ///           bb1(0)
1190 ///         /     \.
1191 ///      bb2(1)   bb3(2)
1192 ///         \    /  \.
1193 ///          bb4(3)  bb5(4)
1194 ///             \   /
1195 ///              bb6(5)
1196 ///
1197 /// Including loops requires additional processing. Whenever a loop header is
1198 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1199 /// from an empty schedule, we first process all RegionNodes that are within
1200 /// this loop and complete the sequential schedule at this loop-level before
1201 /// processing about any other nodes. To implement this
1202 /// loop-nodes-first-processing, the reverse post-order traversal is
1203 /// insufficient. Hence, we additionally check if the traversal yields
1204 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1205 /// These region-nodes are then queue and only traverse after the all nodes
1206 /// within the current loop have been processed.
buildSchedule(Region * R,LoopStackTy & LoopStack)1207 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1208   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1209 
1210   ReversePostOrderTraversal<Region *> RTraversal(R);
1211   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1212   std::deque<RegionNode *> DelayList;
1213   bool LastRNWaiting = false;
1214 
1215   // Iterate over the region @p R in reverse post-order but queue
1216   // sub-regions/blocks iff they are not part of the last encountered but not
1217   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1218   // that we queued the last sub-region/block from the reverse post-order
1219   // iterator. If it is set we have to explore the next sub-region/block from
1220   // the iterator (if any) to guarantee progress. If it is not set we first try
1221   // the next queued sub-region/blocks.
1222   while (!WorkList.empty() || !DelayList.empty()) {
1223     RegionNode *RN;
1224 
1225     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1226       RN = WorkList.front();
1227       WorkList.pop_front();
1228       LastRNWaiting = false;
1229     } else {
1230       RN = DelayList.front();
1231       DelayList.pop_front();
1232     }
1233 
1234     Loop *L = getRegionNodeLoop(RN, LI);
1235     if (!scop->contains(L))
1236       L = OuterScopLoop;
1237 
1238     Loop *LastLoop = LoopStack.back().L;
1239     if (LastLoop != L) {
1240       if (LastLoop && !LastLoop->contains(L)) {
1241         LastRNWaiting = true;
1242         DelayList.push_back(RN);
1243         continue;
1244       }
1245       LoopStack.push_back({L, nullptr, 0});
1246     }
1247     buildSchedule(RN, LoopStack);
1248   }
1249 }
1250 
buildSchedule(RegionNode * RN,LoopStackTy & LoopStack)1251 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1252   if (RN->isSubRegion()) {
1253     auto *LocalRegion = RN->getNodeAs<Region>();
1254     if (!scop->isNonAffineSubRegion(LocalRegion)) {
1255       buildSchedule(LocalRegion, LoopStack);
1256       return;
1257     }
1258   }
1259 
1260   assert(LoopStack.rbegin() != LoopStack.rend());
1261   auto LoopData = LoopStack.rbegin();
1262   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1263 
1264   for (auto *Stmt : scop->getStmtListFor(RN)) {
1265     isl::union_set UDomain{Stmt->getDomain()};
1266     auto StmtSchedule = isl::schedule::from_domain(UDomain);
1267     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1268   }
1269 
1270   // Check if we just processed the last node in this loop. If we did, finalize
1271   // the loop by:
1272   //
1273   //   - adding new schedule dimensions
1274   //   - folding the resulting schedule into the parent loop schedule
1275   //   - dropping the loop schedule from the LoopStack.
1276   //
1277   // Then continue to check surrounding loops, which might also have been
1278   // completed by this node.
1279   size_t Dimension = LoopStack.size();
1280   while (LoopData->L &&
1281          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1282     isl::schedule Schedule = LoopData->Schedule;
1283     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1284 
1285     assert(std::next(LoopData) != LoopStack.rend());
1286     ++LoopData;
1287     --Dimension;
1288 
1289     if (Schedule) {
1290       isl::union_set Domain = Schedule.get_domain();
1291       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1292       Schedule = Schedule.insert_partial_schedule(MUPA);
1293       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1294     }
1295 
1296     LoopData->NumBlocksProcessed += NumBlocksProcessed;
1297   }
1298   // Now pop all loops processed up there from the LoopStack
1299   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1300 }
1301 
buildEscapingDependences(Instruction * Inst)1302 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1303   // Check for uses of this instruction outside the scop. Because we do not
1304   // iterate over such instructions and therefore did not "ensure" the existence
1305   // of a write, we must determine such use here.
1306   if (scop->isEscaping(Inst))
1307     ensureValueWrite(Inst);
1308 }
1309 
1310 /// Check that a value is a Fortran Array descriptor.
1311 ///
1312 /// We check if V has the following structure:
1313 /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>,
1314 ///                                   [<num> x %struct.descriptor_dimension] }
1315 ///
1316 ///
1317 /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> }
1318 ///
1319 /// 1. V's type name starts with "struct.array"
1320 /// 2. V's type has layout as shown.
1321 /// 3. Final member of V's type has name "struct.descriptor_dimension",
1322 /// 4. "struct.descriptor_dimension" has layout as shown.
1323 /// 5. Consistent use of i<zz> where <zz> is some fixed integer number.
1324 ///
1325 /// We are interested in such types since this is the code that dragonegg
1326 /// generates for Fortran array descriptors.
1327 ///
1328 /// @param V the Value to be checked.
1329 ///
1330 /// @returns True if V is a Fortran array descriptor, False otherwise.
isFortranArrayDescriptor(Value * V)1331 bool isFortranArrayDescriptor(Value *V) {
1332   PointerType *PTy = dyn_cast<PointerType>(V->getType());
1333 
1334   if (!PTy)
1335     return false;
1336 
1337   Type *Ty = PTy->getElementType();
1338   assert(Ty && "Ty expected to be initialized");
1339   auto *StructArrTy = dyn_cast<StructType>(Ty);
1340 
1341   if (!(StructArrTy && StructArrTy->hasName()))
1342     return false;
1343 
1344   if (!StructArrTy->getName().startswith("struct.array"))
1345     return false;
1346 
1347   if (StructArrTy->getNumElements() != 4)
1348     return false;
1349 
1350   const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements();
1351 
1352   // i8* match
1353   if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext()))
1354     return false;
1355 
1356   // Get a reference to the int type and check that all the members
1357   // share the same int type
1358   Type *IntTy = ArrMemberTys[1];
1359   if (ArrMemberTys[2] != IntTy)
1360     return false;
1361 
1362   // type: [<num> x %struct.descriptor_dimension]
1363   ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]);
1364   if (!DescriptorDimArrayTy)
1365     return false;
1366 
1367   // type: %struct.descriptor_dimension := type { ixx, ixx, ixx }
1368   StructType *DescriptorDimTy =
1369       dyn_cast<StructType>(DescriptorDimArrayTy->getElementType());
1370 
1371   if (!(DescriptorDimTy && DescriptorDimTy->hasName()))
1372     return false;
1373 
1374   if (DescriptorDimTy->getName() != "struct.descriptor_dimension")
1375     return false;
1376 
1377   if (DescriptorDimTy->getNumElements() != 3)
1378     return false;
1379 
1380   for (auto MemberTy : DescriptorDimTy->elements()) {
1381     if (MemberTy != IntTy)
1382       return false;
1383   }
1384 
1385   return true;
1386 }
1387 
findFADAllocationVisible(MemAccInst Inst)1388 Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) {
1389   // match: 4.1 & 4.2 store/load
1390   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
1391     return nullptr;
1392 
1393   // match: 4
1394   if (Inst.getAlignment() != 8)
1395     return nullptr;
1396 
1397   Value *Address = Inst.getPointerOperand();
1398 
1399   const BitCastInst *Bitcast = nullptr;
1400   // [match: 3]
1401   if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) {
1402     Value *TypedMem = Slot->getPointerOperand();
1403     // match: 2
1404     Bitcast = dyn_cast<BitCastInst>(TypedMem);
1405   } else {
1406     // match: 2
1407     Bitcast = dyn_cast<BitCastInst>(Address);
1408   }
1409 
1410   if (!Bitcast)
1411     return nullptr;
1412 
1413   auto *MallocMem = Bitcast->getOperand(0);
1414 
1415   // match: 1
1416   auto *MallocCall = dyn_cast<CallInst>(MallocMem);
1417   if (!MallocCall)
1418     return nullptr;
1419 
1420   Function *MallocFn = MallocCall->getCalledFunction();
1421   if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc"))
1422     return nullptr;
1423 
1424   // Find all uses the malloc'd memory.
1425   // We are looking for a "store" into a struct with the type being the Fortran
1426   // descriptor type
1427   for (auto user : MallocMem->users()) {
1428     /// match: 5
1429     auto *MallocStore = dyn_cast<StoreInst>(user);
1430     if (!MallocStore)
1431       continue;
1432 
1433     auto *DescriptorGEP =
1434         dyn_cast<GEPOperator>(MallocStore->getPointerOperand());
1435     if (!DescriptorGEP)
1436       continue;
1437 
1438     // match: 5
1439     auto DescriptorType =
1440         dyn_cast<StructType>(DescriptorGEP->getSourceElementType());
1441     if (!(DescriptorType && DescriptorType->hasName()))
1442       continue;
1443 
1444     Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand());
1445 
1446     if (!Descriptor)
1447       continue;
1448 
1449     if (!isFortranArrayDescriptor(Descriptor))
1450       continue;
1451 
1452     return Descriptor;
1453   }
1454 
1455   return nullptr;
1456 }
1457 
findFADAllocationInvisible(MemAccInst Inst)1458 Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
1459   // match: 3
1460   if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst))
1461     return nullptr;
1462 
1463   Value *Slot = Inst.getPointerOperand();
1464 
1465   LoadInst *MemLoad = nullptr;
1466   // [match: 2]
1467   if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) {
1468     // match: 1
1469     MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand());
1470   } else {
1471     // match: 1
1472     MemLoad = dyn_cast<LoadInst>(Slot);
1473   }
1474 
1475   if (!MemLoad)
1476     return nullptr;
1477 
1478   auto *BitcastOperator =
1479       dyn_cast<BitCastOperator>(MemLoad->getPointerOperand());
1480   if (!BitcastOperator)
1481     return nullptr;
1482 
1483   Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0));
1484   if (!Descriptor)
1485     return nullptr;
1486 
1487   if (!isFortranArrayDescriptor(Descriptor))
1488     return nullptr;
1489 
1490   return Descriptor;
1491 }
1492 
addRecordedAssumptions()1493 void ScopBuilder::addRecordedAssumptions() {
1494   for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1495 
1496     if (!AS.BB) {
1497       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1498                           nullptr /* BasicBlock */, AS.RequiresRTC);
1499       continue;
1500     }
1501 
1502     // If the domain was deleted the assumptions are void.
1503     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1504     if (!Dom)
1505       continue;
1506 
1507     // If a basic block was given use its domain to simplify the assumption.
1508     // In case of restrictions we know they only have to hold on the domain,
1509     // thus we can intersect them with the domain of the block. However, for
1510     // assumptions the domain has to imply them, thus:
1511     //                     _              _____
1512     //   Dom => S   <==>   A v B   <==>   A - B
1513     //
1514     // To avoid the complement we will register A - B as a restriction not an
1515     // assumption.
1516     isl_set *S = AS.Set.copy();
1517     if (AS.Sign == AS_RESTRICTION)
1518       S = isl_set_params(isl_set_intersect(S, Dom));
1519     else /* (AS.Sign == AS_ASSUMPTION) */
1520       S = isl_set_params(isl_set_subtract(Dom, S));
1521 
1522     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1523                         AS.RequiresRTC);
1524   }
1525 }
1526 
addUserAssumptions(AssumptionCache & AC,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)1527 void ScopBuilder::addUserAssumptions(
1528     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1529   for (auto &Assumption : AC.assumptions()) {
1530     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1531     if (!CI || CI->getNumArgOperands() != 1)
1532       continue;
1533 
1534     bool InScop = scop->contains(CI);
1535     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1536       continue;
1537 
1538     auto *L = LI.getLoopFor(CI->getParent());
1539     auto *Val = CI->getArgOperand(0);
1540     ParameterSetTy DetectedParams;
1541     auto &R = scop->getRegion();
1542     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1543       ORE.emit(
1544           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1545           << "Non-affine user assumption ignored.");
1546       continue;
1547     }
1548 
1549     // Collect all newly introduced parameters.
1550     ParameterSetTy NewParams;
1551     for (auto *Param : DetectedParams) {
1552       Param = extractConstantFactor(Param, SE).second;
1553       Param = scop->getRepresentingInvariantLoadSCEV(Param);
1554       if (scop->isParam(Param))
1555         continue;
1556       NewParams.insert(Param);
1557     }
1558 
1559     SmallVector<isl_set *, 2> ConditionSets;
1560     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1561     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1562     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1563                        : isl_set_copy(scop->getContext().get());
1564     assert(Dom && "Cannot propagate a nullptr.");
1565     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1566                                     ConditionSets);
1567     isl_set_free(Dom);
1568 
1569     if (!Valid)
1570       continue;
1571 
1572     isl_set *AssumptionCtx = nullptr;
1573     if (InScop) {
1574       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1575       isl_set_free(ConditionSets[0]);
1576     } else {
1577       AssumptionCtx = isl_set_complement(ConditionSets[1]);
1578       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1579     }
1580 
1581     // Project out newly introduced parameters as they are not otherwise useful.
1582     if (!NewParams.empty()) {
1583       for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1584         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1585         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1586         isl_id_free(Id);
1587 
1588         if (!NewParams.count(Param))
1589           continue;
1590 
1591         AssumptionCtx =
1592             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1593       }
1594     }
1595     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1596              << "Use user assumption: " << stringFromIslObj(AssumptionCtx));
1597     isl::set newContext =
1598         scop->getContext().intersect(isl::manage(AssumptionCtx));
1599     scop->setContext(newContext);
1600   }
1601 }
1602 
buildAccessMultiDimFixed(MemAccInst Inst,ScopStmt * Stmt)1603 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1604   Value *Val = Inst.getValueOperand();
1605   Type *ElementType = Val->getType();
1606   Value *Address = Inst.getPointerOperand();
1607   const SCEV *AccessFunction =
1608       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1609   const SCEVUnknown *BasePointer =
1610       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1611   enum MemoryAccess::AccessType AccType =
1612       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1613 
1614   if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
1615     auto *Src = BitCast->getOperand(0);
1616     auto *SrcTy = Src->getType();
1617     auto *DstTy = BitCast->getType();
1618     // Do not try to delinearize non-sized (opaque) pointers.
1619     if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
1620         (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
1621       return false;
1622     }
1623     if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
1624         DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
1625             DL.getTypeAllocSize(DstTy->getPointerElementType()))
1626       Address = Src;
1627   }
1628 
1629   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1630   if (!GEP)
1631     return false;
1632 
1633   SmallVector<const SCEV *, 4> Subscripts;
1634   SmallVector<int, 4> Sizes;
1635   SE.getIndexExpressionsFromGEP(GEP, Subscripts, Sizes);
1636   auto *BasePtr = GEP->getOperand(0);
1637 
1638   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1639     BasePtr = BasePtrCast->getOperand(0);
1640 
1641   // Check for identical base pointers to ensure that we do not miss index
1642   // offsets that have been added before this GEP is applied.
1643   if (BasePtr != BasePointer->getValue())
1644     return false;
1645 
1646   std::vector<const SCEV *> SizesSCEV;
1647 
1648   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1649 
1650   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1651   for (auto *Subscript : Subscripts) {
1652     InvariantLoadsSetTy AccessILS;
1653     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1654                       &AccessILS))
1655       return false;
1656 
1657     for (LoadInst *LInst : AccessILS)
1658       if (!ScopRIL.count(LInst))
1659         return false;
1660   }
1661 
1662   if (Sizes.empty())
1663     return false;
1664 
1665   SizesSCEV.push_back(nullptr);
1666 
1667   for (auto V : Sizes)
1668     SizesSCEV.push_back(SE.getSCEV(
1669         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1670 
1671   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1672                  true, Subscripts, SizesSCEV, Val);
1673   return true;
1674 }
1675 
buildAccessMultiDimParam(MemAccInst Inst,ScopStmt * Stmt)1676 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1677   if (!PollyDelinearize)
1678     return false;
1679 
1680   Value *Address = Inst.getPointerOperand();
1681   Value *Val = Inst.getValueOperand();
1682   Type *ElementType = Val->getType();
1683   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1684   enum MemoryAccess::AccessType AccType =
1685       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1686 
1687   const SCEV *AccessFunction =
1688       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1689   const SCEVUnknown *BasePointer =
1690       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1691 
1692   assert(BasePointer && "Could not find base pointer");
1693 
1694   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1695   auto AccItr = InsnToMemAcc.find(Inst);
1696   if (AccItr == InsnToMemAcc.end())
1697     return false;
1698 
1699   std::vector<const SCEV *> Sizes = {nullptr};
1700 
1701   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1702                AccItr->second.Shape->DelinearizedSizes.end());
1703 
1704   // In case only the element size is contained in the 'Sizes' array, the
1705   // access does not access a real multi-dimensional array. Hence, we allow
1706   // the normal single-dimensional access construction to handle this.
1707   if (Sizes.size() == 1)
1708     return false;
1709 
1710   // Remove the element size. This information is already provided by the
1711   // ElementSize parameter. In case the element size of this access and the
1712   // element size used for delinearization differs the delinearization is
1713   // incorrect. Hence, we invalidate the scop.
1714   //
1715   // TODO: Handle delinearization with differing element sizes.
1716   auto DelinearizedSize =
1717       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1718   Sizes.pop_back();
1719   if (ElementSize != DelinearizedSize)
1720     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1721 
1722   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1723                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1724   return true;
1725 }
1726 
buildAccessMemIntrinsic(MemAccInst Inst,ScopStmt * Stmt)1727 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1728   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1729 
1730   if (MemIntr == nullptr)
1731     return false;
1732 
1733   auto *L = LI.getLoopFor(Inst->getParent());
1734   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1735   assert(LengthVal);
1736 
1737   // Check if the length val is actually affine or if we overapproximate it
1738   InvariantLoadsSetTy AccessILS;
1739   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1740 
1741   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1742   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1743                                      LengthVal, SE, &AccessILS);
1744   for (LoadInst *LInst : AccessILS)
1745     if (!ScopRIL.count(LInst))
1746       LengthIsAffine = false;
1747   if (!LengthIsAffine)
1748     LengthVal = nullptr;
1749 
1750   auto *DestPtrVal = MemIntr->getDest();
1751   assert(DestPtrVal);
1752 
1753   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1754   assert(DestAccFunc);
1755   // Ignore accesses to "NULL".
1756   // TODO: We could use this to optimize the region further, e.g., intersect
1757   //       the context with
1758   //          isl_set_complement(isl_set_params(getDomain()))
1759   //       as we know it would be undefined to execute this instruction anyway.
1760   if (DestAccFunc->isZero())
1761     return true;
1762 
1763   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1764   assert(DestPtrSCEV);
1765   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1766   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1767                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
1768                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1769                  Inst.getValueOperand());
1770 
1771   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1772   if (!MemTrans)
1773     return true;
1774 
1775   auto *SrcPtrVal = MemTrans->getSource();
1776   assert(SrcPtrVal);
1777 
1778   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1779   assert(SrcAccFunc);
1780   // Ignore accesses to "NULL".
1781   // TODO: See above TODO
1782   if (SrcAccFunc->isZero())
1783     return true;
1784 
1785   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1786   assert(SrcPtrSCEV);
1787   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1788   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1789                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1790                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1791                  Inst.getValueOperand());
1792 
1793   return true;
1794 }
1795 
buildAccessCallInst(MemAccInst Inst,ScopStmt * Stmt)1796 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1797   auto *CI = dyn_cast_or_null<CallInst>(Inst);
1798 
1799   if (CI == nullptr)
1800     return false;
1801 
1802   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1803     return true;
1804 
1805   bool ReadOnly = false;
1806   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1807   auto *CalledFunction = CI->getCalledFunction();
1808   switch (AA.getModRefBehavior(CalledFunction)) {
1809   case FMRB_UnknownModRefBehavior:
1810     llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
1811   case FMRB_DoesNotAccessMemory:
1812     return true;
1813   case FMRB_OnlyWritesMemory:
1814   case FMRB_OnlyWritesInaccessibleMem:
1815   case FMRB_OnlyWritesInaccessibleOrArgMem:
1816   case FMRB_OnlyAccessesInaccessibleMem:
1817   case FMRB_OnlyAccessesInaccessibleOrArgMem:
1818     return false;
1819   case FMRB_OnlyReadsMemory:
1820   case FMRB_OnlyReadsInaccessibleMem:
1821   case FMRB_OnlyReadsInaccessibleOrArgMem:
1822     GlobalReads.emplace_back(Stmt, CI);
1823     return true;
1824   case FMRB_OnlyReadsArgumentPointees:
1825     ReadOnly = true;
1826     LLVM_FALLTHROUGH;
1827   case FMRB_OnlyWritesArgumentPointees:
1828   case FMRB_OnlyAccessesArgumentPointees: {
1829     auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1830     Loop *L = LI.getLoopFor(Inst->getParent());
1831     for (const auto &Arg : CI->arg_operands()) {
1832       if (!Arg->getType()->isPointerTy())
1833         continue;
1834 
1835       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1836       if (ArgSCEV->isZero())
1837         continue;
1838 
1839       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1840       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1841                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1842     }
1843     return true;
1844   }
1845   }
1846 
1847   return true;
1848 }
1849 
buildAccessSingleDim(MemAccInst Inst,ScopStmt * Stmt)1850 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1851   Value *Address = Inst.getPointerOperand();
1852   Value *Val = Inst.getValueOperand();
1853   Type *ElementType = Val->getType();
1854   enum MemoryAccess::AccessType AccType =
1855       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1856 
1857   const SCEV *AccessFunction =
1858       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1859   const SCEVUnknown *BasePointer =
1860       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1861 
1862   assert(BasePointer && "Could not find base pointer");
1863   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1864 
1865   // Check if the access depends on a loop contained in a non-affine subregion.
1866   bool isVariantInNonAffineLoop = false;
1867   SetVector<const Loop *> Loops;
1868   findLoops(AccessFunction, Loops);
1869   for (const Loop *L : Loops)
1870     if (Stmt->contains(L)) {
1871       isVariantInNonAffineLoop = true;
1872       break;
1873     }
1874 
1875   InvariantLoadsSetTy AccessILS;
1876 
1877   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1878   bool IsAffine = !isVariantInNonAffineLoop &&
1879                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
1880                                AccessFunction, SE, &AccessILS);
1881 
1882   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1883   for (LoadInst *LInst : AccessILS)
1884     if (!ScopRIL.count(LInst))
1885       IsAffine = false;
1886 
1887   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1888     AccType = MemoryAccess::MAY_WRITE;
1889 
1890   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1891                  IsAffine, {AccessFunction}, {nullptr}, Val);
1892 }
1893 
buildMemoryAccess(MemAccInst Inst,ScopStmt * Stmt)1894 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1895   if (buildAccessMemIntrinsic(Inst, Stmt))
1896     return;
1897 
1898   if (buildAccessCallInst(Inst, Stmt))
1899     return;
1900 
1901   if (buildAccessMultiDimFixed(Inst, Stmt))
1902     return;
1903 
1904   if (buildAccessMultiDimParam(Inst, Stmt))
1905     return;
1906 
1907   buildAccessSingleDim(Inst, Stmt);
1908 }
1909 
buildAccessFunctions()1910 void ScopBuilder::buildAccessFunctions() {
1911   for (auto &Stmt : *scop) {
1912     if (Stmt.isBlockStmt()) {
1913       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1914       continue;
1915     }
1916 
1917     Region *R = Stmt.getRegion();
1918     for (BasicBlock *BB : R->blocks())
1919       buildAccessFunctions(&Stmt, *BB, R);
1920   }
1921 
1922   // Build write accesses for values that are used after the SCoP.
1923   // The instructions defining them might be synthesizable and therefore not
1924   // contained in any statement, hence we iterate over the original instructions
1925   // to identify all escaping values.
1926   for (BasicBlock *BB : scop->getRegion().blocks()) {
1927     for (Instruction &Inst : *BB)
1928       buildEscapingDependences(&Inst);
1929   }
1930 }
1931 
shouldModelInst(Instruction * Inst,Loop * L)1932 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1933   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1934          !canSynthesize(Inst, *scop, &SE, L);
1935 }
1936 
1937 /// Generate a name for a statement.
1938 ///
1939 /// @param BB     The basic block the statement will represent.
1940 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
1941 /// @param Count  The index of the created statement in @p BB.
1942 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1943 ///               no suffix will be added.
1944 /// @param IsLast Uses a special indicator for the last statement of a BB.
makeStmtName(BasicBlock * BB,long BBIdx,int Count,bool IsMain,bool IsLast=false)1945 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1946                                 bool IsMain, bool IsLast = false) {
1947   std::string Suffix;
1948   if (!IsMain) {
1949     if (UseInstructionNames)
1950       Suffix = '_';
1951     if (IsLast)
1952       Suffix += "last";
1953     else if (Count < 26)
1954       Suffix += 'a' + Count;
1955     else
1956       Suffix += std::to_string(Count);
1957   }
1958   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1959 }
1960 
1961 /// Generate a name for a statement that represents a non-affine subregion.
1962 ///
1963 /// @param R    The region the statement will represent.
1964 /// @param RIdx The index of the @p R relative to other BBs/regions.
makeStmtName(Region * R,long RIdx)1965 static std::string makeStmtName(Region *R, long RIdx) {
1966   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1967                               UseInstructionNames);
1968 }
1969 
buildSequentialBlockStmts(BasicBlock * BB,bool SplitOnStore)1970 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1971   Loop *SurroundingLoop = LI.getLoopFor(BB);
1972 
1973   int Count = 0;
1974   long BBIdx = scop->getNextStmtIdx();
1975   std::vector<Instruction *> Instructions;
1976   for (Instruction &Inst : *BB) {
1977     if (shouldModelInst(&Inst, SurroundingLoop))
1978       Instructions.push_back(&Inst);
1979     if (Inst.getMetadata("polly_split_after") ||
1980         (SplitOnStore && isa<StoreInst>(Inst))) {
1981       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1982       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1983       Count++;
1984       Instructions.clear();
1985     }
1986   }
1987 
1988   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1989   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1990 }
1991 
1992 /// Is @p Inst an ordered instruction?
1993 ///
1994 /// An unordered instruction is an instruction, such that a sequence of
1995 /// unordered instructions can be permuted without changing semantics. Any
1996 /// instruction for which this is not always the case is ordered.
isOrderedInstruction(Instruction * Inst)1997 static bool isOrderedInstruction(Instruction *Inst) {
1998   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1999 }
2000 
2001 /// Join instructions to the same statement if one uses the scalar result of the
2002 /// other.
joinOperandTree(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)2003 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
2004                             ArrayRef<Instruction *> ModeledInsts) {
2005   for (Instruction *Inst : ModeledInsts) {
2006     if (isa<PHINode>(Inst))
2007       continue;
2008 
2009     for (Use &Op : Inst->operands()) {
2010       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
2011       if (!OpInst)
2012         continue;
2013 
2014       // Check if OpInst is in the BB and is a modeled instruction.
2015       auto OpVal = UnionFind.findValue(OpInst);
2016       if (OpVal == UnionFind.end())
2017         continue;
2018 
2019       UnionFind.unionSets(Inst, OpInst);
2020     }
2021   }
2022 }
2023 
2024 /// Ensure that the order of ordered instructions does not change.
2025 ///
2026 /// If we encounter an ordered instruction enclosed in instructions belonging to
2027 /// a different statement (which might as well contain ordered instructions, but
2028 /// this is not tested here), join them.
2029 static void
joinOrderedInstructions(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)2030 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
2031                         ArrayRef<Instruction *> ModeledInsts) {
2032   SetVector<Instruction *> SeenLeaders;
2033   for (Instruction *Inst : ModeledInsts) {
2034     if (!isOrderedInstruction(Inst))
2035       continue;
2036 
2037     Instruction *Leader = UnionFind.getLeaderValue(Inst);
2038     // Since previous iterations might have merged sets, some items in
2039     // SeenLeaders are not leaders anymore. However, The new leader of
2040     // previously merged instructions must be one of the former leaders of
2041     // these merged instructions.
2042     bool Inserted = SeenLeaders.insert(Leader);
2043     if (Inserted)
2044       continue;
2045 
2046     // Merge statements to close holes. Say, we have already seen statements A
2047     // and B, in this order. Then we see an instruction of A again and we would
2048     // see the pattern "A B A". This function joins all statements until the
2049     // only seen occurrence of A.
2050     for (Instruction *Prev : reverse(SeenLeaders)) {
2051       // We are backtracking from the last element until we see Inst's leader
2052       // in SeenLeaders and merge all into one set. Although leaders of
2053       // instructions change during the execution of this loop, it's irrelevant
2054       // as we are just searching for the element that we already confirmed is
2055       // in the list.
2056       if (Prev == Leader)
2057         break;
2058       UnionFind.unionSets(Prev, Leader);
2059     }
2060   }
2061 }
2062 
2063 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
2064 /// the incoming values from this block are executed after the PHI READ.
2065 ///
2066 /// Otherwise it could overwrite the incoming value from before the BB with the
2067 /// value for the next execution. This can happen if the PHI WRITE is added to
2068 /// the statement with the instruction that defines the incoming value (instead
2069 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
2070 /// are in order, we put both into the statement. PHI WRITEs are always executed
2071 /// after PHI READs when they are in the same statement.
2072 ///
2073 /// TODO: This is an overpessimization. We only have to ensure that the PHI
2074 /// WRITE is not put into a statement containing the PHI itself. That could also
2075 /// be done by
2076 /// - having all (strongly connected) PHIs in a single statement,
2077 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
2078 ///   has a chance of being lifted before a PHI by being in a statement with a
2079 ///   PHI that comes before in the basic block), or
2080 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
joinOrderedPHIs(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)2081 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
2082                             ArrayRef<Instruction *> ModeledInsts) {
2083   for (Instruction *Inst : ModeledInsts) {
2084     PHINode *PHI = dyn_cast<PHINode>(Inst);
2085     if (!PHI)
2086       continue;
2087 
2088     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
2089     if (Idx < 0)
2090       continue;
2091 
2092     Instruction *IncomingVal =
2093         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
2094     if (!IncomingVal)
2095       continue;
2096 
2097     UnionFind.unionSets(PHI, IncomingVal);
2098   }
2099 }
2100 
buildEqivClassBlockStmts(BasicBlock * BB)2101 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
2102   Loop *L = LI.getLoopFor(BB);
2103 
2104   // Extracting out modeled instructions saves us from checking
2105   // shouldModelInst() repeatedly.
2106   SmallVector<Instruction *, 32> ModeledInsts;
2107   EquivalenceClasses<Instruction *> UnionFind;
2108   Instruction *MainInst = nullptr, *MainLeader = nullptr;
2109   for (Instruction &Inst : *BB) {
2110     if (!shouldModelInst(&Inst, L))
2111       continue;
2112     ModeledInsts.push_back(&Inst);
2113     UnionFind.insert(&Inst);
2114 
2115     // When a BB is split into multiple statements, the main statement is the
2116     // one containing the 'main' instruction. We select the first instruction
2117     // that is unlikely to be removed (because it has side-effects) as the main
2118     // one. It is used to ensure that at least one statement from the bb has the
2119     // same name as with -polly-stmt-granularity=bb.
2120     if (!MainInst && (isa<StoreInst>(Inst) ||
2121                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
2122       MainInst = &Inst;
2123   }
2124 
2125   joinOperandTree(UnionFind, ModeledInsts);
2126   joinOrderedInstructions(UnionFind, ModeledInsts);
2127   joinOrderedPHIs(UnionFind, ModeledInsts);
2128 
2129   // The list of instructions for statement (statement represented by the leader
2130   // instruction).
2131   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
2132 
2133   // The order of statements must be preserved w.r.t. their ordered
2134   // instructions. Without this explicit scan, we would also use non-ordered
2135   // instructions (whose order is arbitrary) to determine statement order.
2136   for (Instruction *Inst : ModeledInsts) {
2137     if (!isOrderedInstruction(Inst))
2138       continue;
2139 
2140     auto LeaderIt = UnionFind.findLeader(Inst);
2141     if (LeaderIt == UnionFind.member_end())
2142       continue;
2143 
2144     // Insert element for the leader instruction.
2145     (void)LeaderToInstList[*LeaderIt];
2146   }
2147 
2148   // Collect the instructions of all leaders. UnionFind's member iterator
2149   // unfortunately are not in any specific order.
2150   for (Instruction *Inst : ModeledInsts) {
2151     auto LeaderIt = UnionFind.findLeader(Inst);
2152     if (LeaderIt == UnionFind.member_end())
2153       continue;
2154 
2155     if (Inst == MainInst)
2156       MainLeader = *LeaderIt;
2157     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2158     InstList.push_back(Inst);
2159   }
2160 
2161   // Finally build the statements.
2162   int Count = 0;
2163   long BBIdx = scop->getNextStmtIdx();
2164   for (auto &Instructions : LeaderToInstList) {
2165     std::vector<Instruction *> &InstList = Instructions.second;
2166 
2167     // If there is no main instruction, make the first statement the main.
2168     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2169 
2170     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2171     scop->addScopStmt(BB, Name, L, std::move(InstList));
2172     Count += 1;
2173   }
2174 
2175   // Unconditionally add an epilogue (last statement). It contains no
2176   // instructions, but holds the PHI write accesses for successor basic blocks,
2177   // if the incoming value is not defined in another statement if the same BB.
2178   // The epilogue becomes the main statement only if there is no other
2179   // statement that could become main.
2180   // The epilogue will be removed if no PHIWrite is added to it.
2181   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2182   scop->addScopStmt(BB, EpilogueName, L, {});
2183 }
2184 
buildStmts(Region & SR)2185 void ScopBuilder::buildStmts(Region &SR) {
2186   if (scop->isNonAffineSubRegion(&SR)) {
2187     std::vector<Instruction *> Instructions;
2188     Loop *SurroundingLoop =
2189         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2190     for (Instruction &Inst : *SR.getEntry())
2191       if (shouldModelInst(&Inst, SurroundingLoop))
2192         Instructions.push_back(&Inst);
2193     long RIdx = scop->getNextStmtIdx();
2194     std::string Name = makeStmtName(&SR, RIdx);
2195     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2196     return;
2197   }
2198 
2199   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2200     if (I->isSubRegion())
2201       buildStmts(*I->getNodeAs<Region>());
2202     else {
2203       BasicBlock *BB = I->getNodeAs<BasicBlock>();
2204       switch (StmtGranularity) {
2205       case GranularityChoice::BasicBlocks:
2206         buildSequentialBlockStmts(BB);
2207         break;
2208       case GranularityChoice::ScalarIndependence:
2209         buildEqivClassBlockStmts(BB);
2210         break;
2211       case GranularityChoice::Stores:
2212         buildSequentialBlockStmts(BB, true);
2213         break;
2214       }
2215     }
2216 }
2217 
buildAccessFunctions(ScopStmt * Stmt,BasicBlock & BB,Region * NonAffineSubRegion)2218 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2219                                        Region *NonAffineSubRegion) {
2220   assert(
2221       Stmt &&
2222       "The exit BB is the only one that cannot be represented by a statement");
2223   assert(Stmt->represents(&BB));
2224 
2225   // We do not build access functions for error blocks, as they may contain
2226   // instructions we can not model.
2227   if (isErrorBlock(BB, scop->getRegion(), LI, DT))
2228     return;
2229 
2230   auto BuildAccessesForInst = [this, Stmt,
2231                                NonAffineSubRegion](Instruction *Inst) {
2232     PHINode *PHI = dyn_cast<PHINode>(Inst);
2233     if (PHI)
2234       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2235 
2236     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2237       assert(Stmt && "Cannot build access function in non-existing statement");
2238       buildMemoryAccess(MemInst, Stmt);
2239     }
2240 
2241     // PHI nodes have already been modeled above and terminators that are
2242     // not part of a non-affine subregion are fully modeled and regenerated
2243     // from the polyhedral domains. Hence, they do not need to be modeled as
2244     // explicit data dependences.
2245     if (!PHI)
2246       buildScalarDependences(Stmt, Inst);
2247   };
2248 
2249   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2250   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2251   if (IsEntryBlock) {
2252     for (Instruction *Inst : Stmt->getInstructions())
2253       BuildAccessesForInst(Inst);
2254     if (Stmt->isRegionStmt())
2255       BuildAccessesForInst(BB.getTerminator());
2256   } else {
2257     for (Instruction &Inst : BB) {
2258       if (isIgnoredIntrinsic(&Inst))
2259         continue;
2260 
2261       // Invariant loads already have been processed.
2262       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2263         continue;
2264 
2265       BuildAccessesForInst(&Inst);
2266     }
2267   }
2268 }
2269 
addMemoryAccess(ScopStmt * Stmt,Instruction * Inst,MemoryAccess::AccessType AccType,Value * BaseAddress,Type * ElementType,bool Affine,Value * AccessValue,ArrayRef<const SCEV * > Subscripts,ArrayRef<const SCEV * > Sizes,MemoryKind Kind)2270 MemoryAccess *ScopBuilder::addMemoryAccess(
2271     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2272     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2273     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2274     MemoryKind Kind) {
2275   bool isKnownMustAccess = false;
2276 
2277   // Accesses in single-basic block statements are always executed.
2278   if (Stmt->isBlockStmt())
2279     isKnownMustAccess = true;
2280 
2281   if (Stmt->isRegionStmt()) {
2282     // Accesses that dominate the exit block of a non-affine region are always
2283     // executed. In non-affine regions there may exist MemoryKind::Values that
2284     // do not dominate the exit. MemoryKind::Values will always dominate the
2285     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2286     // non-affine region.
2287     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2288       isKnownMustAccess = true;
2289   }
2290 
2291   // Non-affine PHI writes do not "happen" at a particular instruction, but
2292   // after exiting the statement. Therefore they are guaranteed to execute and
2293   // overwrite the old value.
2294   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2295     isKnownMustAccess = true;
2296 
2297   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2298     AccType = MemoryAccess::MAY_WRITE;
2299 
2300   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2301                                   Affine, Subscripts, Sizes, AccessValue, Kind);
2302 
2303   scop->addAccessFunction(Access);
2304   Stmt->addAccess(Access);
2305   return Access;
2306 }
2307 
addArrayAccess(ScopStmt * Stmt,MemAccInst MemAccInst,MemoryAccess::AccessType AccType,Value * BaseAddress,Type * ElementType,bool IsAffine,ArrayRef<const SCEV * > Subscripts,ArrayRef<const SCEV * > Sizes,Value * AccessValue)2308 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2309                                  MemoryAccess::AccessType AccType,
2310                                  Value *BaseAddress, Type *ElementType,
2311                                  bool IsAffine,
2312                                  ArrayRef<const SCEV *> Subscripts,
2313                                  ArrayRef<const SCEV *> Sizes,
2314                                  Value *AccessValue) {
2315   ArrayBasePointers.insert(BaseAddress);
2316   auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress,
2317                                     ElementType, IsAffine, AccessValue,
2318                                     Subscripts, Sizes, MemoryKind::Array);
2319 
2320   if (!DetectFortranArrays)
2321     return;
2322 
2323   if (Value *FAD = findFADAllocationInvisible(MemAccInst))
2324     MemAccess->setFortranArrayDescriptor(FAD);
2325   else if (Value *FAD = findFADAllocationVisible(MemAccInst))
2326     MemAccess->setFortranArrayDescriptor(FAD);
2327 }
2328 
2329 /// Check if @p Expr is divisible by @p Size.
isDivisible(const SCEV * Expr,unsigned Size,ScalarEvolution & SE)2330 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2331   assert(Size != 0);
2332   if (Size == 1)
2333     return true;
2334 
2335   // Only one factor needs to be divisible.
2336   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2337     for (auto *FactorExpr : MulExpr->operands())
2338       if (isDivisible(FactorExpr, Size, SE))
2339         return true;
2340     return false;
2341   }
2342 
2343   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2344   // to be divisible.
2345   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2346     for (auto *OpExpr : NAryExpr->operands())
2347       if (!isDivisible(OpExpr, Size, SE))
2348         return false;
2349     return true;
2350   }
2351 
2352   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2353   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2354   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2355   return MulSCEV == Expr;
2356 }
2357 
foldSizeConstantsToRight()2358 void ScopBuilder::foldSizeConstantsToRight() {
2359   isl::union_set Accessed = scop->getAccesses().range();
2360 
2361   for (auto Array : scop->arrays()) {
2362     if (Array->getNumberOfDimensions() <= 1)
2363       continue;
2364 
2365     isl::space Space = Array->getSpace();
2366     Space = Space.align_params(Accessed.get_space());
2367 
2368     if (!Accessed.contains(Space))
2369       continue;
2370 
2371     isl::set Elements = Accessed.extract_set(Space);
2372     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2373 
2374     std::vector<int> Int;
2375     int Dims = Elements.dim(isl::dim::set);
2376     for (int i = 0; i < Dims; i++) {
2377       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2378       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2379       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2380 
2381       isl::basic_set DimHull = DimOnly.affine_hull();
2382 
2383       if (i == Dims - 1) {
2384         Int.push_back(1);
2385         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2386         continue;
2387       }
2388 
2389       if (DimHull.dim(isl::dim::div) == 1) {
2390         isl::aff Diff = DimHull.get_div(0);
2391         isl::val Val = Diff.get_denominator_val();
2392 
2393         int ValInt = 1;
2394         if (Val.is_int()) {
2395           auto ValAPInt = APIntFromVal(Val);
2396           if (ValAPInt.isSignedIntN(32))
2397             ValInt = ValAPInt.getSExtValue();
2398         } else {
2399         }
2400 
2401         Int.push_back(ValInt);
2402         isl::constraint C = isl::constraint::alloc_equality(
2403             isl::local_space(Transform.get_space()));
2404         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2405         C = C.set_coefficient_si(isl::dim::in, i, -1);
2406         Transform = Transform.add_constraint(C);
2407         continue;
2408       }
2409 
2410       isl::basic_set ZeroSet = isl::basic_set(DimHull);
2411       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2412 
2413       int ValInt = 1;
2414       if (ZeroSet.is_equal(DimHull)) {
2415         ValInt = 0;
2416       }
2417 
2418       Int.push_back(ValInt);
2419       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2420     }
2421 
2422     isl::set MappedElements = isl::map(Transform).domain();
2423     if (!Elements.is_subset(MappedElements))
2424       continue;
2425 
2426     bool CanFold = true;
2427     if (Int[0] <= 1)
2428       CanFold = false;
2429 
2430     unsigned NumDims = Array->getNumberOfDimensions();
2431     for (unsigned i = 1; i < NumDims - 1; i++)
2432       if (Int[0] != Int[i] && Int[i])
2433         CanFold = false;
2434 
2435     if (!CanFold)
2436       continue;
2437 
2438     for (auto &Access : scop->access_functions())
2439       if (Access->getScopArrayInfo() == Array)
2440         Access->setAccessRelation(
2441             Access->getAccessRelation().apply_range(Transform));
2442 
2443     std::vector<const SCEV *> Sizes;
2444     for (unsigned i = 0; i < NumDims; i++) {
2445       auto Size = Array->getDimensionSize(i);
2446 
2447       if (i == NumDims - 1)
2448         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2449       Sizes.push_back(Size);
2450     }
2451 
2452     Array->updateSizes(Sizes, false /* CheckConsistency */);
2453   }
2454 }
2455 
markFortranArrays()2456 void ScopBuilder::markFortranArrays() {
2457   for (ScopStmt &Stmt : *scop) {
2458     for (MemoryAccess *MemAcc : Stmt) {
2459       Value *FAD = MemAcc->getFortranArrayDescriptor();
2460       if (!FAD)
2461         continue;
2462 
2463       // TODO: const_cast-ing to edit
2464       ScopArrayInfo *SAI =
2465           const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
2466       assert(SAI && "memory access into a Fortran array does not "
2467                     "have an associated ScopArrayInfo");
2468       SAI->applyAndSetFAD(FAD);
2469     }
2470   }
2471 }
2472 
finalizeAccesses()2473 void ScopBuilder::finalizeAccesses() {
2474   updateAccessDimensionality();
2475   foldSizeConstantsToRight();
2476   foldAccessRelations();
2477   assumeNoOutOfBounds();
2478   markFortranArrays();
2479 }
2480 
updateAccessDimensionality()2481 void ScopBuilder::updateAccessDimensionality() {
2482   // Check all array accesses for each base pointer and find a (virtual) element
2483   // size for the base pointer that divides all access functions.
2484   for (ScopStmt &Stmt : *scop)
2485     for (MemoryAccess *Access : Stmt) {
2486       if (!Access->isArrayKind())
2487         continue;
2488       ScopArrayInfo *Array =
2489           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2490 
2491       if (Array->getNumberOfDimensions() != 1)
2492         continue;
2493       unsigned DivisibleSize = Array->getElemSizeInBytes();
2494       const SCEV *Subscript = Access->getSubscript(0);
2495       while (!isDivisible(Subscript, DivisibleSize, SE))
2496         DivisibleSize /= 2;
2497       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2498       Array->updateElementType(Ty);
2499     }
2500 
2501   for (auto &Stmt : *scop)
2502     for (auto &Access : Stmt)
2503       Access->updateDimensionality();
2504 }
2505 
foldAccessRelations()2506 void ScopBuilder::foldAccessRelations() {
2507   for (auto &Stmt : *scop)
2508     for (auto &Access : Stmt)
2509       Access->foldAccessRelation();
2510 }
2511 
assumeNoOutOfBounds()2512 void ScopBuilder::assumeNoOutOfBounds() {
2513   if (PollyIgnoreInbounds)
2514     return;
2515   for (auto &Stmt : *scop)
2516     for (auto &Access : Stmt) {
2517       isl::set Outside = Access->assumeNoOutOfBound();
2518       const auto &Loc = Access->getAccessInstruction()
2519                             ? Access->getAccessInstruction()->getDebugLoc()
2520                             : DebugLoc();
2521       recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
2522                        AS_ASSUMPTION);
2523     }
2524 }
2525 
ensureValueWrite(Instruction * Inst)2526 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2527   // Find the statement that defines the value of Inst. That statement has to
2528   // write the value to make it available to those statements that read it.
2529   ScopStmt *Stmt = scop->getStmtFor(Inst);
2530 
2531   // It is possible that the value is synthesizable within a loop (such that it
2532   // is not part of any statement), but not after the loop (where you need the
2533   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2534   // avoid this. In case the IR has no such PHI, use the last statement (where
2535   // the value is synthesizable) to write the value.
2536   if (!Stmt)
2537     Stmt = scop->getLastStmtFor(Inst->getParent());
2538 
2539   // Inst not defined within this SCoP.
2540   if (!Stmt)
2541     return;
2542 
2543   // Do not process further if the instruction is already written.
2544   if (Stmt->lookupValueWriteOf(Inst))
2545     return;
2546 
2547   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2548                   true, Inst, ArrayRef<const SCEV *>(),
2549                   ArrayRef<const SCEV *>(), MemoryKind::Value);
2550 }
2551 
ensureValueRead(Value * V,ScopStmt * UserStmt)2552 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2553   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2554   // to be able to replace this one. Currently, there is a split responsibility.
2555   // In a first step, the MemoryAccess is created, but without the
2556   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2557   // AccessRelation is created. At least for scalar accesses, there is no new
2558   // information available at ScopStmt::buildAccessRelations(), so we could
2559   // create the AccessRelation right away. This is what
2560   // ScopStmt::ensureValueRead(Value*) does.
2561 
2562   auto *Scope = UserStmt->getSurroundingLoop();
2563   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2564   switch (VUse.getKind()) {
2565   case VirtualUse::Constant:
2566   case VirtualUse::Block:
2567   case VirtualUse::Synthesizable:
2568   case VirtualUse::Hoisted:
2569   case VirtualUse::Intra:
2570     // Uses of these kinds do not need a MemoryAccess.
2571     break;
2572 
2573   case VirtualUse::ReadOnly:
2574     // Add MemoryAccess for invariant values only if requested.
2575     if (!ModelReadOnlyScalars)
2576       break;
2577 
2578     LLVM_FALLTHROUGH;
2579   case VirtualUse::Inter:
2580 
2581     // Do not create another MemoryAccess for reloading the value if one already
2582     // exists.
2583     if (UserStmt->lookupValueReadOf(V))
2584       break;
2585 
2586     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2587                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2588                     MemoryKind::Value);
2589 
2590     // Inter-statement uses need to write the value in their defining statement.
2591     if (VUse.isInter())
2592       ensureValueWrite(cast<Instruction>(V));
2593     break;
2594   }
2595 }
2596 
ensurePHIWrite(PHINode * PHI,ScopStmt * IncomingStmt,BasicBlock * IncomingBlock,Value * IncomingValue,bool IsExitBlock)2597 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2598                                  BasicBlock *IncomingBlock,
2599                                  Value *IncomingValue, bool IsExitBlock) {
2600   // As the incoming block might turn out to be an error statement ensure we
2601   // will create an exit PHI SAI object. It is needed during code generation
2602   // and would be created later anyway.
2603   if (IsExitBlock)
2604     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2605                                    MemoryKind::ExitPHI);
2606 
2607   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2608   // from outside the SCoP's region have no statement representation.
2609   if (!IncomingStmt)
2610     return;
2611 
2612   // Take care for the incoming value being available in the incoming block.
2613   // This must be done before the check for multiple PHI writes because multiple
2614   // exiting edges from subregion each can be the effective written value of the
2615   // subregion. As such, all of them must be made available in the subregion
2616   // statement.
2617   ensureValueRead(IncomingValue, IncomingStmt);
2618 
2619   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2620   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2621     assert(Acc->getAccessInstruction() == PHI);
2622     Acc->addIncoming(IncomingBlock, IncomingValue);
2623     return;
2624   }
2625 
2626   MemoryAccess *Acc = addMemoryAccess(
2627       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2628       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2629       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2630   assert(Acc);
2631   Acc->addIncoming(IncomingBlock, IncomingValue);
2632 }
2633 
addPHIReadAccess(ScopStmt * PHIStmt,PHINode * PHI)2634 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2635   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2636                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2637                   MemoryKind::PHI);
2638 }
2639 
buildDomain(ScopStmt & Stmt)2640 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2641   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2642 
2643   Stmt.Domain = scop->getDomainConditions(&Stmt);
2644   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2645 }
2646 
collectSurroundingLoops(ScopStmt & Stmt)2647 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2648   isl::set Domain = Stmt.getDomain();
2649   BasicBlock *BB = Stmt.getEntryBlock();
2650 
2651   Loop *L = LI.getLoopFor(BB);
2652 
2653   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2654     L = L->getParentLoop();
2655 
2656   SmallVector<llvm::Loop *, 8> Loops;
2657 
2658   while (L && Stmt.getParent()->getRegion().contains(L)) {
2659     Loops.push_back(L);
2660     L = L->getParentLoop();
2661   }
2662 
2663   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2664 }
2665 
2666 /// Return the reduction type for a given binary operator.
getReductionType(const BinaryOperator * BinOp,const Instruction * Load)2667 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2668                                                     const Instruction *Load) {
2669   if (!BinOp)
2670     return MemoryAccess::RT_NONE;
2671   switch (BinOp->getOpcode()) {
2672   case Instruction::FAdd:
2673     if (!BinOp->isFast())
2674       return MemoryAccess::RT_NONE;
2675     LLVM_FALLTHROUGH;
2676   case Instruction::Add:
2677     return MemoryAccess::RT_ADD;
2678   case Instruction::Or:
2679     return MemoryAccess::RT_BOR;
2680   case Instruction::Xor:
2681     return MemoryAccess::RT_BXOR;
2682   case Instruction::And:
2683     return MemoryAccess::RT_BAND;
2684   case Instruction::FMul:
2685     if (!BinOp->isFast())
2686       return MemoryAccess::RT_NONE;
2687     LLVM_FALLTHROUGH;
2688   case Instruction::Mul:
2689     if (DisableMultiplicativeReductions)
2690       return MemoryAccess::RT_NONE;
2691     return MemoryAccess::RT_MUL;
2692   default:
2693     return MemoryAccess::RT_NONE;
2694   }
2695 }
2696 
checkForReductions(ScopStmt & Stmt)2697 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2698   SmallVector<MemoryAccess *, 2> Loads;
2699   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2700 
2701   // First collect candidate load-store reduction chains by iterating over all
2702   // stores and collecting possible reduction loads.
2703   for (MemoryAccess *StoreMA : Stmt) {
2704     if (StoreMA->isRead())
2705       continue;
2706 
2707     Loads.clear();
2708     collectCandidateReductionLoads(StoreMA, Loads);
2709     for (MemoryAccess *LoadMA : Loads)
2710       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
2711   }
2712 
2713   // Then check each possible candidate pair.
2714   for (const auto &CandidatePair : Candidates) {
2715     bool Valid = true;
2716     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
2717     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
2718 
2719     // Skip those with obviously unequal base addresses.
2720     if (!LoadAccs.has_equal_space(StoreAccs)) {
2721       continue;
2722     }
2723 
2724     // And check if the remaining for overlap with other memory accesses.
2725     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2726     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
2727     isl::set AllAccs = AllAccsRel.range();
2728 
2729     for (MemoryAccess *MA : Stmt) {
2730       if (MA == CandidatePair.first || MA == CandidatePair.second)
2731         continue;
2732 
2733       isl::map AccRel =
2734           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
2735       isl::set Accs = AccRel.range();
2736 
2737       if (AllAccs.has_equal_space(Accs)) {
2738         isl::set OverlapAccs = Accs.intersect(AllAccs);
2739         Valid = Valid && OverlapAccs.is_empty();
2740       }
2741     }
2742 
2743     if (!Valid)
2744       continue;
2745 
2746     const LoadInst *Load =
2747         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
2748     MemoryAccess::ReductionType RT =
2749         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
2750 
2751     // If no overlapping access was found we mark the load and store as
2752     // reduction like.
2753     CandidatePair.first->markAsReductionLike(RT);
2754     CandidatePair.second->markAsReductionLike(RT);
2755   }
2756 }
2757 
verifyInvariantLoads()2758 void ScopBuilder::verifyInvariantLoads() {
2759   auto &RIL = scop->getRequiredInvariantLoads();
2760   for (LoadInst *LI : RIL) {
2761     assert(LI && scop->contains(LI));
2762     // If there exists a statement in the scop which has a memory access for
2763     // @p LI, then mark this scop as infeasible for optimization.
2764     for (ScopStmt &Stmt : *scop)
2765       if (Stmt.getArrayAccessOrNULLFor(LI)) {
2766         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2767         return;
2768       }
2769   }
2770 }
2771 
hoistInvariantLoads()2772 void ScopBuilder::hoistInvariantLoads() {
2773   if (!PollyInvariantLoadHoisting)
2774     return;
2775 
2776   isl::union_map Writes = scop->getWrites();
2777   for (ScopStmt &Stmt : *scop) {
2778     InvariantAccessesTy InvariantAccesses;
2779 
2780     for (MemoryAccess *Access : Stmt)
2781       if (isl::set NHCtx = getNonHoistableCtx(Access, Writes))
2782         InvariantAccesses.push_back({Access, NHCtx});
2783 
2784     // Transfer the memory access from the statement to the SCoP.
2785     for (auto InvMA : InvariantAccesses)
2786       Stmt.removeMemoryAccess(InvMA.MA);
2787     addInvariantLoads(Stmt, InvariantAccesses);
2788   }
2789 }
2790 
2791 /// Check if an access range is too complex.
2792 ///
2793 /// An access range is too complex, if it contains either many disjuncts or
2794 /// very complex expressions. As a simple heuristic, we assume if a set to
2795 /// be too complex if the sum of existentially quantified dimensions and
2796 /// set dimensions is larger than a threshold. This reliably detects both
2797 /// sets with many disjuncts as well as sets with many divisions as they
2798 /// arise in h264.
2799 ///
2800 /// @param AccessRange The range to check for complexity.
2801 ///
2802 /// @returns True if the access range is too complex.
isAccessRangeTooComplex(isl::set AccessRange)2803 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2804   int NumTotalDims = 0;
2805 
2806   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2807     NumTotalDims += BSet.dim(isl::dim::div);
2808     NumTotalDims += BSet.dim(isl::dim::set);
2809   }
2810 
2811   if (NumTotalDims > MaxDimensionsInAccessRange)
2812     return true;
2813 
2814   return false;
2815 }
2816 
hasNonHoistableBasePtrInScop(MemoryAccess * MA,isl::union_map Writes)2817 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2818                                                isl::union_map Writes) {
2819   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2820     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2821   }
2822 
2823   Value *BaseAddr = MA->getOriginalBaseAddr();
2824   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2825     if (!isa<LoadInst>(BasePtrInst))
2826       return scop->contains(BasePtrInst);
2827 
2828   return false;
2829 }
2830 
addUserContext()2831 void ScopBuilder::addUserContext() {
2832   if (UserContextStr.empty())
2833     return;
2834 
2835   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2836   isl::space Space = scop->getParamSpace();
2837   if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) {
2838     std::string SpaceStr = Space.to_str();
2839     errs() << "Error: the context provided in -polly-context has not the same "
2840            << "number of dimensions than the computed context. Due to this "
2841            << "mismatch, the -polly-context option is ignored. Please provide "
2842            << "the context in the parameter space: " << SpaceStr << ".\n";
2843     return;
2844   }
2845 
2846   for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) {
2847     std::string NameContext =
2848         scop->getContext().get_dim_name(isl::dim::param, i);
2849     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2850 
2851     if (NameContext != NameUserContext) {
2852       std::string SpaceStr = Space.to_str();
2853       errs() << "Error: the name of dimension " << i
2854              << " provided in -polly-context "
2855              << "is '" << NameUserContext << "', but the name in the computed "
2856              << "context is '" << NameContext
2857              << "'. Due to this name mismatch, "
2858              << "the -polly-context option is ignored. Please provide "
2859              << "the context in the parameter space: " << SpaceStr << ".\n";
2860       return;
2861     }
2862 
2863     UserContext = UserContext.set_dim_id(isl::dim::param, i,
2864                                          Space.get_dim_id(isl::dim::param, i));
2865   }
2866   isl::set newContext = scop->getContext().intersect(UserContext);
2867   scop->setContext(newContext);
2868 }
2869 
getNonHoistableCtx(MemoryAccess * Access,isl::union_map Writes)2870 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2871                                          isl::union_map Writes) {
2872   // TODO: Loads that are not loop carried, hence are in a statement with
2873   //       zero iterators, are by construction invariant, though we
2874   //       currently "hoist" them anyway. This is necessary because we allow
2875   //       them to be treated as parameters (e.g., in conditions) and our code
2876   //       generation would otherwise use the old value.
2877 
2878   auto &Stmt = *Access->getStatement();
2879   BasicBlock *BB = Stmt.getEntryBlock();
2880 
2881   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2882       Access->isMemoryIntrinsic())
2883     return nullptr;
2884 
2885   // Skip accesses that have an invariant base pointer which is defined but
2886   // not loaded inside the SCoP. This can happened e.g., if a readnone call
2887   // returns a pointer that is used as a base address. However, as we want
2888   // to hoist indirect pointers, we allow the base pointer to be defined in
2889   // the region if it is also a memory access. Each ScopArrayInfo object
2890   // that has a base pointer origin has a base pointer that is loaded and
2891   // that it is invariant, thus it will be hoisted too. However, if there is
2892   // no base pointer origin we check that the base pointer is defined
2893   // outside the region.
2894   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2895   if (hasNonHoistableBasePtrInScop(Access, Writes))
2896     return nullptr;
2897 
2898   isl::map AccessRelation = Access->getAccessRelation();
2899   assert(!AccessRelation.is_empty());
2900 
2901   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2902     return nullptr;
2903 
2904   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2905   isl::set SafeToLoad;
2906 
2907   auto &DL = scop->getFunction().getParent()->getDataLayout();
2908   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2909                                   LI->getAlign(), DL)) {
2910     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2911   } else if (BB != LI->getParent()) {
2912     // Skip accesses in non-affine subregions as they might not be executed
2913     // under the same condition as the entry of the non-affine subregion.
2914     return nullptr;
2915   } else {
2916     SafeToLoad = AccessRelation.range();
2917   }
2918 
2919   if (isAccessRangeTooComplex(AccessRelation.range()))
2920     return nullptr;
2921 
2922   isl::union_map Written = Writes.intersect_range(SafeToLoad);
2923   isl::set WrittenCtx = Written.params();
2924   bool IsWritten = !WrittenCtx.is_empty();
2925 
2926   if (!IsWritten)
2927     return WrittenCtx;
2928 
2929   WrittenCtx = WrittenCtx.remove_divs();
2930   bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain;
2931   if (TooComplex || !isRequiredInvariantLoad(LI))
2932     return nullptr;
2933 
2934   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2935                       AS_RESTRICTION, LI->getParent());
2936   return WrittenCtx;
2937 }
2938 
isAParameter(llvm::Value * maybeParam,const Function & F)2939 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2940   for (const llvm::Argument &Arg : F.args())
2941     if (&Arg == maybeParam)
2942       return true;
2943 
2944   return false;
2945 }
2946 
canAlwaysBeHoisted(MemoryAccess * MA,bool StmtInvalidCtxIsEmpty,bool MAInvalidCtxIsEmpty,bool NonHoistableCtxIsEmpty)2947 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2948                                      bool StmtInvalidCtxIsEmpty,
2949                                      bool MAInvalidCtxIsEmpty,
2950                                      bool NonHoistableCtxIsEmpty) {
2951   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2952   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2953   if (PollyAllowDereferenceOfAllFunctionParams &&
2954       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2955     return true;
2956 
2957   // TODO: We can provide more information for better but more expensive
2958   //       results.
2959   if (!isDereferenceableAndAlignedPointer(
2960           LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2961     return false;
2962 
2963   // If the location might be overwritten we do not hoist it unconditionally.
2964   //
2965   // TODO: This is probably too conservative.
2966   if (!NonHoistableCtxIsEmpty)
2967     return false;
2968 
2969   // If a dereferenceable load is in a statement that is modeled precisely we
2970   // can hoist it.
2971   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2972     return true;
2973 
2974   // Even if the statement is not modeled precisely we can hoist the load if it
2975   // does not involve any parameters that might have been specialized by the
2976   // statement domain.
2977   for (const SCEV *Subscript : MA->subscripts())
2978     if (!isa<SCEVConstant>(Subscript))
2979       return false;
2980   return true;
2981 }
2982 
addInvariantLoads(ScopStmt & Stmt,InvariantAccessesTy & InvMAs)2983 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2984                                     InvariantAccessesTy &InvMAs) {
2985   if (InvMAs.empty())
2986     return;
2987 
2988   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2989   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2990 
2991   // Get the context under which the statement is executed but remove the error
2992   // context under which this statement is reached.
2993   isl::set DomainCtx = Stmt.getDomain().params();
2994   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
2995 
2996   if (DomainCtx.n_basic_set() >= MaxDisjunctsInDomain) {
2997     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2998     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
2999     return;
3000   }
3001 
3002   // Project out all parameters that relate to loads in the statement. Otherwise
3003   // we could have cyclic dependences on the constraints under which the
3004   // hoisted loads are executed and we could not determine an order in which to
3005   // pre-load them. This happens because not only lower bounds are part of the
3006   // domain but also upper bounds.
3007   for (auto &InvMA : InvMAs) {
3008     auto *MA = InvMA.MA;
3009     Instruction *AccInst = MA->getAccessInstruction();
3010     if (SE.isSCEVable(AccInst->getType())) {
3011       SetVector<Value *> Values;
3012       for (const SCEV *Parameter : scop->parameters()) {
3013         Values.clear();
3014         findValues(Parameter, SE, Values);
3015         if (!Values.count(AccInst))
3016           continue;
3017 
3018         if (isl::id ParamId = scop->getIdForParam(Parameter)) {
3019           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
3020           if (Dim >= 0)
3021             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
3022         }
3023       }
3024     }
3025   }
3026 
3027   for (auto &InvMA : InvMAs) {
3028     auto *MA = InvMA.MA;
3029     isl::set NHCtx = InvMA.NonHoistableCtx;
3030 
3031     // Check for another invariant access that accesses the same location as
3032     // MA and if found consolidate them. Otherwise create a new equivalence
3033     // class at the end of InvariantEquivClasses.
3034     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
3035     Type *Ty = LInst->getType();
3036     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
3037 
3038     isl::set MAInvalidCtx = MA->getInvalidContext();
3039     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
3040     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
3041 
3042     isl::set MACtx;
3043     // Check if we know that this pointer can be speculatively accessed.
3044     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
3045                            NonHoistableCtxIsEmpty)) {
3046       MACtx = isl::set::universe(DomainCtx.get_space());
3047     } else {
3048       MACtx = DomainCtx;
3049       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
3050       MACtx = MACtx.gist_params(scop->getContext());
3051     }
3052 
3053     bool Consolidated = false;
3054     for (auto &IAClass : scop->invariantEquivClasses()) {
3055       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
3056         continue;
3057 
3058       // If the pointer and the type is equal check if the access function wrt.
3059       // to the domain is equal too. It can happen that the domain fixes
3060       // parameter values and these can be different for distinct part of the
3061       // SCoP. If this happens we cannot consolidate the loads but need to
3062       // create a new invariant load equivalence class.
3063       auto &MAs = IAClass.InvariantAccesses;
3064       if (!MAs.empty()) {
3065         auto *LastMA = MAs.front();
3066 
3067         isl::set AR = MA->getAccessRelation().range();
3068         isl::set LastAR = LastMA->getAccessRelation().range();
3069         bool SameAR = AR.is_equal(LastAR);
3070 
3071         if (!SameAR)
3072           continue;
3073       }
3074 
3075       // Add MA to the list of accesses that are in this class.
3076       MAs.push_front(MA);
3077 
3078       Consolidated = true;
3079 
3080       // Unify the execution context of the class and this statement.
3081       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
3082       if (IAClassDomainCtx)
3083         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
3084       else
3085         IAClassDomainCtx = MACtx;
3086       IAClass.ExecutionContext = IAClassDomainCtx;
3087       break;
3088     }
3089 
3090     if (Consolidated)
3091       continue;
3092 
3093     MACtx = MACtx.coalesce();
3094 
3095     // If we did not consolidate MA, thus did not find an equivalence class
3096     // for it, we create a new one.
3097     scop->addInvariantEquivClass(
3098         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
3099   }
3100 }
3101 
collectCandidateReductionLoads(MemoryAccess * StoreMA,SmallVectorImpl<MemoryAccess * > & Loads)3102 void ScopBuilder::collectCandidateReductionLoads(
3103     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
3104   ScopStmt *Stmt = StoreMA->getStatement();
3105 
3106   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
3107   if (!Store)
3108     return;
3109 
3110   // Skip if there is not one binary operator between the load and the store
3111   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
3112   if (!BinOp)
3113     return;
3114 
3115   // Skip if the binary operators has multiple uses
3116   if (BinOp->getNumUses() != 1)
3117     return;
3118 
3119   // Skip if the opcode of the binary operator is not commutative/associative
3120   if (!BinOp->isCommutative() || !BinOp->isAssociative())
3121     return;
3122 
3123   // Skip if the binary operator is outside the current SCoP
3124   if (BinOp->getParent() != Store->getParent())
3125     return;
3126 
3127   // Skip if it is a multiplicative reduction and we disabled them
3128   if (DisableMultiplicativeReductions &&
3129       (BinOp->getOpcode() == Instruction::Mul ||
3130        BinOp->getOpcode() == Instruction::FMul))
3131     return;
3132 
3133   // Check the binary operator operands for a candidate load
3134   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
3135   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
3136   if (!PossibleLoad0 && !PossibleLoad1)
3137     return;
3138 
3139   // A load is only a candidate if it cannot escape (thus has only this use)
3140   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
3141     if (PossibleLoad0->getParent() == Store->getParent())
3142       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
3143   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
3144     if (PossibleLoad1->getParent() == Store->getParent())
3145       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
3146 }
3147 
3148 /// Find the canonical scop array info object for a set of invariant load
3149 /// hoisted loads. The canonical array is the one that corresponds to the
3150 /// first load in the list of accesses which is used as base pointer of a
3151 /// scop array.
findCanonicalArray(Scop & S,MemoryAccessList & Accesses)3152 static const ScopArrayInfo *findCanonicalArray(Scop &S,
3153                                                MemoryAccessList &Accesses) {
3154   for (MemoryAccess *Access : Accesses) {
3155     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
3156         Access->getAccessInstruction(), MemoryKind::Array);
3157     if (CanonicalArray)
3158       return CanonicalArray;
3159   }
3160   return nullptr;
3161 }
3162 
3163 /// Check if @p Array severs as base array in an invariant load.
isUsedForIndirectHoistedLoad(Scop & S,const ScopArrayInfo * Array)3164 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
3165   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
3166     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3167       if (Access2->getScopArrayInfo() == Array)
3168         return true;
3169   return false;
3170 }
3171 
3172 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3173 /// with a reference to @p New.
replaceBasePtrArrays(Scop & S,const ScopArrayInfo * Old,const ScopArrayInfo * New)3174 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3175                                  const ScopArrayInfo *New) {
3176   for (ScopStmt &Stmt : S)
3177     for (MemoryAccess *Access : Stmt) {
3178       if (Access->getLatestScopArrayInfo() != Old)
3179         continue;
3180 
3181       isl::id Id = New->getBasePtrId();
3182       isl::map Map = Access->getAccessRelation();
3183       Map = Map.set_tuple_id(isl::dim::out, Id);
3184       Access->setAccessRelation(Map);
3185     }
3186 }
3187 
canonicalizeDynamicBasePtrs()3188 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3189   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3190     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3191 
3192     const ScopArrayInfo *CanonicalBasePtrSAI =
3193         findCanonicalArray(*scop, BasePtrAccesses);
3194 
3195     if (!CanonicalBasePtrSAI)
3196       continue;
3197 
3198     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3199       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3200           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3201       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3202           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3203         continue;
3204 
3205       // we currently do not canonicalize arrays where some accesses are
3206       // hoisted as invariant loads. If we would, we need to update the access
3207       // function of the invariant loads as well. However, as this is not a
3208       // very common situation, we leave this for now to avoid further
3209       // complexity increases.
3210       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3211         continue;
3212 
3213       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3214     }
3215   }
3216 }
3217 
buildAccessRelations(ScopStmt & Stmt)3218 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3219   for (MemoryAccess *Access : Stmt.MemAccs) {
3220     Type *ElementType = Access->getElementType();
3221 
3222     MemoryKind Ty;
3223     if (Access->isPHIKind())
3224       Ty = MemoryKind::PHI;
3225     else if (Access->isExitPHIKind())
3226       Ty = MemoryKind::ExitPHI;
3227     else if (Access->isValueKind())
3228       Ty = MemoryKind::Value;
3229     else
3230       Ty = MemoryKind::Array;
3231 
3232     // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3233     // assumptions which are taken. isl::pw_aff objects are cached internally
3234     // and they are used later by scop.
3235     for (const SCEV *Size : Access->Sizes) {
3236       if (!Size)
3237         continue;
3238       scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3239     }
3240     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3241                                                ElementType, Access->Sizes, Ty);
3242 
3243     // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3244     // assumptions which are taken. isl::pw_aff objects are cached internally
3245     // and they are used later by scop.
3246     for (const SCEV *Subscript : Access->subscripts()) {
3247       if (!Access->isAffine() || !Subscript)
3248         continue;
3249       scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3250                      &RecordedAssumptions);
3251     }
3252     Access->buildAccessRelation(SAI);
3253     scop->addAccessData(Access);
3254   }
3255 }
3256 
3257 /// Add the minimal/maximal access in @p Set to @p User.
3258 ///
3259 /// @return True if more accesses should be added, false if we reached the
3260 ///         maximal number of run-time checks to be generated.
buildMinMaxAccess(isl::set Set,Scop::MinMaxVectorTy & MinMaxAccesses,Scop & S)3261 static bool buildMinMaxAccess(isl::set Set,
3262                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3263   isl::pw_multi_aff MinPMA, MaxPMA;
3264   isl::pw_aff LastDimAff;
3265   isl::aff OneAff;
3266   unsigned Pos;
3267 
3268   Set = Set.remove_divs();
3269   polly::simplify(Set);
3270 
3271   if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts)
3272     Set = Set.simple_hull();
3273 
3274   // Restrict the number of parameters involved in the access as the lexmin/
3275   // lexmax computation will take too long if this number is high.
3276   //
3277   // Experiments with a simple test case using an i7 4800MQ:
3278   //
3279   //  #Parameters involved | Time (in sec)
3280   //            6          |     0.01
3281   //            7          |     0.04
3282   //            8          |     0.12
3283   //            9          |     0.40
3284   //           10          |     1.54
3285   //           11          |     6.78
3286   //           12          |    30.38
3287   //
3288   if (isl_set_n_param(Set.get()) >
3289       static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3290     unsigned InvolvedParams = 0;
3291     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3292       if (Set.involves_dims(isl::dim::param, u, 1))
3293         InvolvedParams++;
3294 
3295     if (InvolvedParams > RunTimeChecksMaxParameters)
3296       return false;
3297   }
3298 
3299   MinPMA = Set.lexmin_pw_multi_aff();
3300   MaxPMA = Set.lexmax_pw_multi_aff();
3301 
3302   MinPMA = MinPMA.coalesce();
3303   MaxPMA = MaxPMA.coalesce();
3304 
3305   // Adjust the last dimension of the maximal access by one as we want to
3306   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3307   // we test during code generation might now point after the end of the
3308   // allocated array but we will never dereference it anyway.
3309   assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) &&
3310          "Assumed at least one output dimension");
3311 
3312   Pos = MaxPMA.dim(isl::dim::out) - 1;
3313   LastDimAff = MaxPMA.get_pw_aff(Pos);
3314   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3315   OneAff = OneAff.add_constant_si(1);
3316   LastDimAff = LastDimAff.add(OneAff);
3317   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3318 
3319   if (!MinPMA || !MaxPMA)
3320     return false;
3321 
3322   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3323 
3324   return true;
3325 }
3326 
3327 /// Wrapper function to calculate minimal/maximal accesses to each array.
calculateMinMaxAccess(AliasGroupTy AliasGroup,Scop::MinMaxVectorTy & MinMaxAccesses)3328 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3329                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
3330   MinMaxAccesses.reserve(AliasGroup.size());
3331 
3332   isl::union_set Domains = scop->getDomains();
3333   isl::union_map Accesses = isl::union_map::empty(scop->getParamSpace());
3334 
3335   for (MemoryAccess *MA : AliasGroup)
3336     Accesses = Accesses.add_map(MA->getAccessRelation());
3337 
3338   Accesses = Accesses.intersect_domain(Domains);
3339   isl::union_set Locations = Accesses.range();
3340 
3341   bool LimitReached = false;
3342   for (isl::set Set : Locations.get_set_list()) {
3343     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3344     if (LimitReached)
3345       break;
3346   }
3347 
3348   return !LimitReached;
3349 }
3350 
getAccessDomain(MemoryAccess * MA)3351 static isl::set getAccessDomain(MemoryAccess *MA) {
3352   isl::set Domain = MA->getStatement()->getDomain();
3353   Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim());
3354   return Domain.reset_tuple_id();
3355 }
3356 
buildAliasChecks()3357 bool ScopBuilder::buildAliasChecks() {
3358   if (!PollyUseRuntimeAliasChecks)
3359     return true;
3360 
3361   if (buildAliasGroups()) {
3362     // Aliasing assumptions do not go through addAssumption but we still want to
3363     // collect statistics so we do it here explicitly.
3364     if (scop->getAliasGroups().size())
3365       Scop::incrementNumberOfAliasingAssumptions(1);
3366     return true;
3367   }
3368 
3369   // If a problem occurs while building the alias groups we need to delete
3370   // this SCoP and pretend it wasn't valid in the first place. To this end
3371   // we make the assumed context infeasible.
3372   scop->invalidate(ALIASING, DebugLoc());
3373 
3374   LLVM_DEBUG(
3375       dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3376              << " could not be created as the number of parameters involved "
3377                 "is too high. The SCoP will be "
3378                 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
3379                 "the maximal number of parameters but be advised that the "
3380                 "compile time might increase exponentially.\n\n");
3381   return false;
3382 }
3383 
3384 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
buildAliasGroupsForAccesses()3385 ScopBuilder::buildAliasGroupsForAccesses() {
3386   AliasSetTracker AST(AA);
3387 
3388   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3389   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3390   for (ScopStmt &Stmt : *scop) {
3391 
3392     isl::set StmtDomain = Stmt.getDomain();
3393     bool StmtDomainEmpty = StmtDomain.is_empty();
3394 
3395     // Statements with an empty domain will never be executed.
3396     if (StmtDomainEmpty)
3397       continue;
3398 
3399     for (MemoryAccess *MA : Stmt) {
3400       if (MA->isScalarKind())
3401         continue;
3402       if (!MA->isRead())
3403         HasWriteAccess.insert(MA->getScopArrayInfo());
3404       MemAccInst Acc(MA->getAccessInstruction());
3405       if (MA->isRead() && isa<MemTransferInst>(Acc))
3406         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3407       else
3408         PtrToAcc[Acc.getPointerOperand()] = MA;
3409       AST.add(Acc);
3410     }
3411   }
3412 
3413   AliasGroupVectorTy AliasGroups;
3414   for (AliasSet &AS : AST) {
3415     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3416       continue;
3417     AliasGroupTy AG;
3418     for (auto &PR : AS)
3419       AG.push_back(PtrToAcc[PR.getValue()]);
3420     if (AG.size() < 2)
3421       continue;
3422     AliasGroups.push_back(std::move(AG));
3423   }
3424 
3425   return std::make_tuple(AliasGroups, HasWriteAccess);
3426 }
3427 
buildAliasGroups()3428 bool ScopBuilder::buildAliasGroups() {
3429   // To create sound alias checks we perform the following steps:
3430   //   o) We partition each group into read only and non read only accesses.
3431   //   o) For each group with more than one base pointer we then compute minimal
3432   //      and maximal accesses to each array of a group in read only and non
3433   //      read only partitions separately.
3434   AliasGroupVectorTy AliasGroups;
3435   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3436 
3437   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3438 
3439   splitAliasGroupsByDomain(AliasGroups);
3440 
3441   for (AliasGroupTy &AG : AliasGroups) {
3442     if (!scop->hasFeasibleRuntimeContext())
3443       return false;
3444 
3445     {
3446       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3447       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3448       if (!Valid)
3449         return false;
3450     }
3451     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3452       scop->invalidate(COMPLEXITY, DebugLoc());
3453       return false;
3454     }
3455   }
3456 
3457   return true;
3458 }
3459 
buildAliasGroup(AliasGroupTy & AliasGroup,DenseSet<const ScopArrayInfo * > HasWriteAccess)3460 bool ScopBuilder::buildAliasGroup(
3461     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3462   AliasGroupTy ReadOnlyAccesses;
3463   AliasGroupTy ReadWriteAccesses;
3464   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3465   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3466 
3467   if (AliasGroup.size() < 2)
3468     return true;
3469 
3470   for (MemoryAccess *Access : AliasGroup) {
3471     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3472                                         Access->getAccessInstruction())
3473              << "Possibly aliasing pointer, use restrict keyword.");
3474     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3475     if (HasWriteAccess.count(Array)) {
3476       ReadWriteArrays.insert(Array);
3477       ReadWriteAccesses.push_back(Access);
3478     } else {
3479       ReadOnlyArrays.insert(Array);
3480       ReadOnlyAccesses.push_back(Access);
3481     }
3482   }
3483 
3484   // If there are no read-only pointers, and less than two read-write pointers,
3485   // no alias check is needed.
3486   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3487     return true;
3488 
3489   // If there is no read-write pointer, no alias check is needed.
3490   if (ReadWriteArrays.empty())
3491     return true;
3492 
3493   // For non-affine accesses, no alias check can be generated as we cannot
3494   // compute a sufficiently tight lower and upper bound: bail out.
3495   for (MemoryAccess *MA : AliasGroup) {
3496     if (!MA->isAffine()) {
3497       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3498                        MA->getAccessInstruction()->getParent());
3499       return false;
3500     }
3501   }
3502 
3503   // Ensure that for all memory accesses for which we generate alias checks,
3504   // their base pointers are available.
3505   for (MemoryAccess *MA : AliasGroup) {
3506     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3507       scop->addRequiredInvariantLoad(
3508           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3509   }
3510 
3511   //  scop->getAliasGroups().emplace_back();
3512   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3513   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3514   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3515 
3516   bool Valid;
3517 
3518   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3519 
3520   if (!Valid)
3521     return false;
3522 
3523   // Bail out if the number of values we need to compare is too large.
3524   // This is important as the number of comparisons grows quadratically with
3525   // the number of values we need to compare.
3526   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3527       RunTimeChecksMaxArraysPerGroup)
3528     return false;
3529 
3530   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3531 
3532   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3533   if (!Valid)
3534     return false;
3535 
3536   return true;
3537 }
3538 
splitAliasGroupsByDomain(AliasGroupVectorTy & AliasGroups)3539 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3540   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3541     AliasGroupTy NewAG;
3542     AliasGroupTy &AG = AliasGroups[u];
3543     AliasGroupTy::iterator AGI = AG.begin();
3544     isl::set AGDomain = getAccessDomain(*AGI);
3545     while (AGI != AG.end()) {
3546       MemoryAccess *MA = *AGI;
3547       isl::set MADomain = getAccessDomain(MA);
3548       if (AGDomain.is_disjoint(MADomain)) {
3549         NewAG.push_back(MA);
3550         AGI = AG.erase(AGI);
3551       } else {
3552         AGDomain = AGDomain.unite(MADomain);
3553         AGI++;
3554       }
3555     }
3556     if (NewAG.size() > 1)
3557       AliasGroups.push_back(std::move(NewAG));
3558   }
3559 }
3560 
3561 #ifndef NDEBUG
verifyUse(Scop * S,Use & Op,LoopInfo & LI)3562 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3563   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3564   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3565   assert(PhysUse.getKind() == VirtUse.getKind());
3566 }
3567 
3568 /// Check the consistency of every statement's MemoryAccesses.
3569 ///
3570 /// The check is carried out by expecting the "physical" kind of use (derived
3571 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3572 /// kind of use (derived from a statement's MemoryAccess).
3573 ///
3574 /// The "physical" uses are taken by ensureValueRead to determine whether to
3575 /// create MemoryAccesses. When done, the kind of scalar access should be the
3576 /// same no matter which way it was derived.
3577 ///
3578 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3579 /// can intentionally influence on the kind of uses (not corresponding to the
3580 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3581 /// to pick up the virtual uses. But here in the code generator, this has not
3582 /// happened yet, such that virtual and physical uses are equivalent.
verifyUses(Scop * S,LoopInfo & LI,DominatorTree & DT)3583 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3584   for (auto *BB : S->getRegion().blocks()) {
3585     for (auto &Inst : *BB) {
3586       auto *Stmt = S->getStmtFor(&Inst);
3587       if (!Stmt)
3588         continue;
3589 
3590       if (isIgnoredIntrinsic(&Inst))
3591         continue;
3592 
3593       // Branch conditions are encoded in the statement domains.
3594       if (Inst.isTerminator() && Stmt->isBlockStmt())
3595         continue;
3596 
3597       // Verify all uses.
3598       for (auto &Op : Inst.operands())
3599         verifyUse(S, Op, LI);
3600 
3601       // Stores do not produce values used by other statements.
3602       if (isa<StoreInst>(Inst))
3603         continue;
3604 
3605       // For every value defined in the block, also check that a use of that
3606       // value in the same statement would not be an inter-statement use. It can
3607       // still be synthesizable or load-hoisted, but these kind of instructions
3608       // are not directly copied in code-generation.
3609       auto VirtDef =
3610           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3611       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3612              VirtDef.getKind() == VirtualUse::Intra ||
3613              VirtDef.getKind() == VirtualUse::Hoisted);
3614     }
3615   }
3616 
3617   if (S->hasSingleExitEdge())
3618     return;
3619 
3620   // PHINodes in the SCoP region's exit block are also uses to be checked.
3621   if (!S->getRegion().isTopLevelRegion()) {
3622     for (auto &Inst : *S->getRegion().getExit()) {
3623       if (!isa<PHINode>(Inst))
3624         break;
3625 
3626       for (auto &Op : Inst.operands())
3627         verifyUse(S, Op, LI);
3628     }
3629   }
3630 }
3631 #endif
3632 
buildScop(Region & R,AssumptionCache & AC)3633 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3634   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3635                       SD.getNextID()));
3636 
3637   buildStmts(R);
3638 
3639   // Create all invariant load instructions first. These are categorized as
3640   // 'synthesizable', therefore are not part of any ScopStmt but need to be
3641   // created somewhere.
3642   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3643   for (BasicBlock *BB : scop->getRegion().blocks()) {
3644     if (isErrorBlock(*BB, scop->getRegion(), LI, DT))
3645       continue;
3646 
3647     for (Instruction &Inst : *BB) {
3648       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3649       if (!Load)
3650         continue;
3651 
3652       if (!RIL.count(Load))
3653         continue;
3654 
3655       // Invariant loads require a MemoryAccess to be created in some statement.
3656       // It is not important to which statement the MemoryAccess is added
3657       // because it will later be removed from the ScopStmt again. We chose the
3658       // first statement of the basic block the LoadInst is in.
3659       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3660       assert(!List.empty());
3661       ScopStmt *RILStmt = List.front();
3662       buildMemoryAccess(Load, RILStmt);
3663     }
3664   }
3665   buildAccessFunctions();
3666 
3667   // In case the region does not have an exiting block we will later (during
3668   // code generation) split the exit block. This will move potential PHI nodes
3669   // from the current exit block into the new region exiting block. Hence, PHI
3670   // nodes that are at this point not part of the region will be.
3671   // To handle these PHI nodes later we will now model their operands as scalar
3672   // accesses. Note that we do not model anything in the exit block if we have
3673   // an exiting block in the region, as there will not be any splitting later.
3674   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3675     for (Instruction &Inst : *R.getExit()) {
3676       PHINode *PHI = dyn_cast<PHINode>(&Inst);
3677       if (!PHI)
3678         break;
3679 
3680       buildPHIAccesses(nullptr, PHI, nullptr, true);
3681     }
3682   }
3683 
3684   // Create memory accesses for global reads since all arrays are now known.
3685   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3686   for (auto GlobalReadPair : GlobalReads) {
3687     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3688     Instruction *GlobalRead = GlobalReadPair.second;
3689     for (auto *BP : ArrayBasePointers)
3690       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3691                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3692   }
3693 
3694   buildInvariantEquivalenceClasses();
3695 
3696   /// A map from basic blocks to their invalid domains.
3697   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3698 
3699   if (!buildDomains(&R, InvalidDomainMap)) {
3700     LLVM_DEBUG(
3701         dbgs() << "Bailing-out because buildDomains encountered problems\n");
3702     return;
3703   }
3704 
3705   addUserAssumptions(AC, InvalidDomainMap);
3706 
3707   // Initialize the invalid domain.
3708   for (ScopStmt &Stmt : scop->Stmts)
3709     if (Stmt.isBlockStmt())
3710       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3711     else
3712       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3713           Stmt.getRegion()->getNode())]);
3714 
3715   // Remove empty statements.
3716   // Exit early in case there are no executable statements left in this scop.
3717   scop->removeStmtNotInDomainMap();
3718   scop->simplifySCoP(false);
3719   if (scop->isEmpty()) {
3720     LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3721     return;
3722   }
3723 
3724   // The ScopStmts now have enough information to initialize themselves.
3725   for (ScopStmt &Stmt : *scop) {
3726     collectSurroundingLoops(Stmt);
3727 
3728     buildDomain(Stmt);
3729     buildAccessRelations(Stmt);
3730 
3731     if (DetectReductions)
3732       checkForReductions(Stmt);
3733   }
3734 
3735   // Check early for a feasible runtime context.
3736   if (!scop->hasFeasibleRuntimeContext()) {
3737     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3738     return;
3739   }
3740 
3741   // Check early for profitability. Afterwards it cannot change anymore,
3742   // only the runtime context could become infeasible.
3743   if (!scop->isProfitable(UnprofitableScalarAccs)) {
3744     scop->invalidate(PROFITABLE, DebugLoc());
3745     LLVM_DEBUG(
3746         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3747     return;
3748   }
3749 
3750   buildSchedule();
3751 
3752   finalizeAccesses();
3753 
3754   scop->realignParams();
3755   addUserContext();
3756 
3757   // After the context was fully constructed, thus all our knowledge about
3758   // the parameters is in there, we add all recorded assumptions to the
3759   // assumed/invalid context.
3760   addRecordedAssumptions();
3761 
3762   scop->simplifyContexts();
3763   if (!buildAliasChecks()) {
3764     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3765     return;
3766   }
3767 
3768   hoistInvariantLoads();
3769   canonicalizeDynamicBasePtrs();
3770   verifyInvariantLoads();
3771   scop->simplifySCoP(true);
3772 
3773   // Check late for a feasible runtime context because profitability did not
3774   // change.
3775   if (!scop->hasFeasibleRuntimeContext()) {
3776     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3777     return;
3778   }
3779 
3780 #ifndef NDEBUG
3781   verifyUses(scop.get(), LI, DT);
3782 #endif
3783 }
3784 
ScopBuilder(Region * R,AssumptionCache & AC,AliasAnalysis & AA,const DataLayout & DL,DominatorTree & DT,LoopInfo & LI,ScopDetection & SD,ScalarEvolution & SE,OptimizationRemarkEmitter & ORE)3785 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
3786                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3787                          ScopDetection &SD, ScalarEvolution &SE,
3788                          OptimizationRemarkEmitter &ORE)
3789     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3790   DebugLoc Beg, End;
3791   auto P = getBBPairForRegion(R);
3792   getDebugLocations(P, Beg, End);
3793 
3794   std::string Msg = "SCoP begins here.";
3795   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3796            << Msg);
3797 
3798   buildScop(*R, AC);
3799 
3800   LLVM_DEBUG(dbgs() << *scop);
3801 
3802   if (!scop->hasFeasibleRuntimeContext()) {
3803     InfeasibleScops++;
3804     Msg = "SCoP ends here but was dismissed.";
3805     LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3806     RecordedAssumptions.clear();
3807     scop.reset();
3808   } else {
3809     Msg = "SCoP ends here.";
3810     ++ScopFound;
3811     if (scop->getMaxLoopDepth() > 0)
3812       ++RichScopFound;
3813   }
3814 
3815   if (R->isTopLevelRegion())
3816     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3817              << Msg);
3818   else
3819     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3820              << Msg);
3821 }
3822