109467b48Spatrick //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This transformation analyzes and transforms the induction variables (and
1009467b48Spatrick // computations derived from them) into forms suitable for efficient execution
1109467b48Spatrick // on the target.
1209467b48Spatrick //
1309467b48Spatrick // This pass performs a strength reduction on array references inside loops that
1409467b48Spatrick // have as one or more of their components the loop induction variable, it
1509467b48Spatrick // rewrites expressions to take advantage of scaled-index addressing modes
1609467b48Spatrick // available on the target, and it performs a variety of other optimizations
1709467b48Spatrick // related to loop induction variables.
1809467b48Spatrick //
1909467b48Spatrick // Terminology note: this code has a lot of handling for "post-increment" or
2009467b48Spatrick // "post-inc" users. This is not talking about post-increment addressing modes;
2109467b48Spatrick // it is instead talking about code like this:
2209467b48Spatrick //
2309467b48Spatrick //   %i = phi [ 0, %entry ], [ %i.next, %latch ]
2409467b48Spatrick //   ...
2509467b48Spatrick //   %i.next = add %i, 1
2609467b48Spatrick //   %c = icmp eq %i.next, %n
2709467b48Spatrick //
2809467b48Spatrick // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
2909467b48Spatrick // it's useful to think about these as the same register, with some uses using
3009467b48Spatrick // the value of the register before the add and some using it after. In this
3109467b48Spatrick // example, the icmp is a post-increment user, since it uses %i.next, which is
3209467b48Spatrick // the value of the induction variable after the increment. The other common
3309467b48Spatrick // case of post-increment users is users outside the loop.
3409467b48Spatrick //
3509467b48Spatrick // TODO: More sophistication in the way Formulae are generated and filtered.
3609467b48Spatrick //
3709467b48Spatrick // TODO: Handle multiple loops at a time.
3809467b48Spatrick //
3909467b48Spatrick // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
4009467b48Spatrick //       of a GlobalValue?
4109467b48Spatrick //
4209467b48Spatrick // TODO: When truncation is free, truncate ICmp users' operands to make it a
4309467b48Spatrick //       smaller encoding (on x86 at least).
4409467b48Spatrick //
4509467b48Spatrick // TODO: When a negated register is used by an add (such as in a list of
4609467b48Spatrick //       multiple base registers, or as the increment expression in an addrec),
4709467b48Spatrick //       we may not actually need both reg and (-1 * reg) in registers; the
4809467b48Spatrick //       negation can be implemented by using a sub instead of an add. The
4909467b48Spatrick //       lack of support for taking this into consideration when making
5009467b48Spatrick //       register pressure decisions is partly worked around by the "Special"
5109467b48Spatrick //       use kind.
5209467b48Spatrick //
5309467b48Spatrick //===----------------------------------------------------------------------===//
5409467b48Spatrick 
5509467b48Spatrick #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
5609467b48Spatrick #include "llvm/ADT/APInt.h"
5709467b48Spatrick #include "llvm/ADT/DenseMap.h"
5809467b48Spatrick #include "llvm/ADT/DenseSet.h"
5909467b48Spatrick #include "llvm/ADT/Hashing.h"
6009467b48Spatrick #include "llvm/ADT/PointerIntPair.h"
6109467b48Spatrick #include "llvm/ADT/STLExtras.h"
6209467b48Spatrick #include "llvm/ADT/SetVector.h"
6309467b48Spatrick #include "llvm/ADT/SmallBitVector.h"
6409467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
6509467b48Spatrick #include "llvm/ADT/SmallSet.h"
6609467b48Spatrick #include "llvm/ADT/SmallVector.h"
67*d415bd75Srobert #include "llvm/ADT/Statistic.h"
6809467b48Spatrick #include "llvm/ADT/iterator_range.h"
69097a140dSpatrick #include "llvm/Analysis/AssumptionCache.h"
7009467b48Spatrick #include "llvm/Analysis/IVUsers.h"
7109467b48Spatrick #include "llvm/Analysis/LoopAnalysisManager.h"
7209467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
7309467b48Spatrick #include "llvm/Analysis/LoopPass.h"
74097a140dSpatrick #include "llvm/Analysis/MemorySSA.h"
75097a140dSpatrick #include "llvm/Analysis/MemorySSAUpdater.h"
7609467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
7709467b48Spatrick #include "llvm/Analysis/ScalarEvolutionExpressions.h"
7809467b48Spatrick #include "llvm/Analysis/ScalarEvolutionNormalization.h"
7973471bf0Spatrick #include "llvm/Analysis/TargetLibraryInfo.h"
8009467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
8173471bf0Spatrick #include "llvm/Analysis/ValueTracking.h"
82*d415bd75Srobert #include "llvm/BinaryFormat/Dwarf.h"
8309467b48Spatrick #include "llvm/Config/llvm-config.h"
8409467b48Spatrick #include "llvm/IR/BasicBlock.h"
8509467b48Spatrick #include "llvm/IR/Constant.h"
8609467b48Spatrick #include "llvm/IR/Constants.h"
8773471bf0Spatrick #include "llvm/IR/DebugInfoMetadata.h"
8809467b48Spatrick #include "llvm/IR/DerivedTypes.h"
8909467b48Spatrick #include "llvm/IR/Dominators.h"
9009467b48Spatrick #include "llvm/IR/GlobalValue.h"
9109467b48Spatrick #include "llvm/IR/IRBuilder.h"
9209467b48Spatrick #include "llvm/IR/InstrTypes.h"
9309467b48Spatrick #include "llvm/IR/Instruction.h"
9409467b48Spatrick #include "llvm/IR/Instructions.h"
9509467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
9609467b48Spatrick #include "llvm/IR/Module.h"
9709467b48Spatrick #include "llvm/IR/Operator.h"
9809467b48Spatrick #include "llvm/IR/PassManager.h"
9909467b48Spatrick #include "llvm/IR/Type.h"
10009467b48Spatrick #include "llvm/IR/Use.h"
10109467b48Spatrick #include "llvm/IR/User.h"
10209467b48Spatrick #include "llvm/IR/Value.h"
10309467b48Spatrick #include "llvm/IR/ValueHandle.h"
10409467b48Spatrick #include "llvm/InitializePasses.h"
10509467b48Spatrick #include "llvm/Pass.h"
10609467b48Spatrick #include "llvm/Support/Casting.h"
10709467b48Spatrick #include "llvm/Support/CommandLine.h"
10809467b48Spatrick #include "llvm/Support/Compiler.h"
10909467b48Spatrick #include "llvm/Support/Debug.h"
11009467b48Spatrick #include "llvm/Support/ErrorHandling.h"
11109467b48Spatrick #include "llvm/Support/MathExtras.h"
11209467b48Spatrick #include "llvm/Support/raw_ostream.h"
11309467b48Spatrick #include "llvm/Transforms/Scalar.h"
11409467b48Spatrick #include "llvm/Transforms/Utils.h"
11509467b48Spatrick #include "llvm/Transforms/Utils/BasicBlockUtils.h"
11609467b48Spatrick #include "llvm/Transforms/Utils/Local.h"
117*d415bd75Srobert #include "llvm/Transforms/Utils/LoopUtils.h"
118097a140dSpatrick #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
11909467b48Spatrick #include <algorithm>
12009467b48Spatrick #include <cassert>
12109467b48Spatrick #include <cstddef>
12209467b48Spatrick #include <cstdint>
12309467b48Spatrick #include <iterator>
12409467b48Spatrick #include <limits>
12509467b48Spatrick #include <map>
12609467b48Spatrick #include <numeric>
127*d415bd75Srobert #include <optional>
12809467b48Spatrick #include <utility>
12909467b48Spatrick 
13009467b48Spatrick using namespace llvm;
13109467b48Spatrick 
13209467b48Spatrick #define DEBUG_TYPE "loop-reduce"
13309467b48Spatrick 
13409467b48Spatrick /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
13509467b48Spatrick /// bail out. This threshold is far beyond the number of users that LSR can
13609467b48Spatrick /// conceivably solve, so it should not affect generated code, but catches the
13709467b48Spatrick /// worst cases before LSR burns too much compile time and stack space.
13809467b48Spatrick static const unsigned MaxIVUsers = 200;
13909467b48Spatrick 
140*d415bd75Srobert /// Limit the size of expression that SCEV-based salvaging will attempt to
141*d415bd75Srobert /// translate into a DIExpression.
142*d415bd75Srobert /// Choose a maximum size such that debuginfo is not excessively increased and
143*d415bd75Srobert /// the salvaging is not too expensive for the compiler.
144*d415bd75Srobert static const unsigned MaxSCEVSalvageExpressionSize = 64;
145*d415bd75Srobert 
146*d415bd75Srobert // Cleanup congruent phis after LSR phi expansion.
14709467b48Spatrick static cl::opt<bool> EnablePhiElim(
14809467b48Spatrick   "enable-lsr-phielim", cl::Hidden, cl::init(true),
14909467b48Spatrick   cl::desc("Enable LSR phi elimination"));
15009467b48Spatrick 
151*d415bd75Srobert // The flag adds instruction count to solutions cost comparison.
15209467b48Spatrick static cl::opt<bool> InsnsCost(
15309467b48Spatrick   "lsr-insns-cost", cl::Hidden, cl::init(true),
15409467b48Spatrick   cl::desc("Add instruction count to a LSR cost model"));
15509467b48Spatrick 
15609467b48Spatrick // Flag to choose how to narrow complex lsr solution
15709467b48Spatrick static cl::opt<bool> LSRExpNarrow(
15809467b48Spatrick   "lsr-exp-narrow", cl::Hidden, cl::init(false),
15909467b48Spatrick   cl::desc("Narrow LSR complex solution using"
16009467b48Spatrick            " expectation of registers number"));
16109467b48Spatrick 
16209467b48Spatrick // Flag to narrow search space by filtering non-optimal formulae with
16309467b48Spatrick // the same ScaledReg and Scale.
16409467b48Spatrick static cl::opt<bool> FilterSameScaledReg(
16509467b48Spatrick     "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
16609467b48Spatrick     cl::desc("Narrow LSR search space by filtering non-optimal formulae"
16709467b48Spatrick              " with the same ScaledReg and Scale"));
16809467b48Spatrick 
16973471bf0Spatrick static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
17073471bf0Spatrick   "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),
17173471bf0Spatrick    cl::desc("A flag that overrides the target's preferred addressing mode."),
17273471bf0Spatrick    cl::values(clEnumValN(TTI::AMK_None,
17373471bf0Spatrick                          "none",
17473471bf0Spatrick                          "Don't prefer any addressing mode"),
17573471bf0Spatrick               clEnumValN(TTI::AMK_PreIndexed,
17673471bf0Spatrick                          "preindexed",
17773471bf0Spatrick                          "Prefer pre-indexed addressing mode"),
17873471bf0Spatrick               clEnumValN(TTI::AMK_PostIndexed,
17973471bf0Spatrick                          "postindexed",
18073471bf0Spatrick                          "Prefer post-indexed addressing mode")));
18109467b48Spatrick 
18209467b48Spatrick static cl::opt<unsigned> ComplexityLimit(
18309467b48Spatrick   "lsr-complexity-limit", cl::Hidden,
18409467b48Spatrick   cl::init(std::numeric_limits<uint16_t>::max()),
18509467b48Spatrick   cl::desc("LSR search space complexity limit"));
18609467b48Spatrick 
18709467b48Spatrick static cl::opt<unsigned> SetupCostDepthLimit(
18809467b48Spatrick     "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
18909467b48Spatrick     cl::desc("The limit on recursion depth for LSRs setup cost"));
19009467b48Spatrick 
191*d415bd75Srobert static cl::opt<bool> AllowTerminatingConditionFoldingAfterLSR(
192*d415bd75Srobert     "lsr-term-fold", cl::Hidden, cl::init(false),
193*d415bd75Srobert     cl::desc("Attempt to replace primary IV with other IV."));
194*d415bd75Srobert 
195*d415bd75Srobert static cl::opt<bool> AllowDropSolutionIfLessProfitable(
196*d415bd75Srobert     "lsr-drop-solution", cl::Hidden, cl::init(false),
197*d415bd75Srobert     cl::desc("Attempt to drop solution if it is less profitable"));
198*d415bd75Srobert 
199*d415bd75Srobert STATISTIC(NumTermFold,
200*d415bd75Srobert           "Number of terminating condition fold recognized and performed");
201*d415bd75Srobert 
20209467b48Spatrick #ifndef NDEBUG
20309467b48Spatrick // Stress test IV chain generation.
20409467b48Spatrick static cl::opt<bool> StressIVChain(
20509467b48Spatrick   "stress-ivchain", cl::Hidden, cl::init(false),
20609467b48Spatrick   cl::desc("Stress test LSR IV chains"));
20709467b48Spatrick #else
20809467b48Spatrick static bool StressIVChain = false;
20909467b48Spatrick #endif
21009467b48Spatrick 
21109467b48Spatrick namespace {
21209467b48Spatrick 
21309467b48Spatrick struct MemAccessTy {
21409467b48Spatrick   /// Used in situations where the accessed memory type is unknown.
21509467b48Spatrick   static const unsigned UnknownAddressSpace =
21609467b48Spatrick       std::numeric_limits<unsigned>::max();
21709467b48Spatrick 
21809467b48Spatrick   Type *MemTy = nullptr;
21909467b48Spatrick   unsigned AddrSpace = UnknownAddressSpace;
22009467b48Spatrick 
22109467b48Spatrick   MemAccessTy() = default;
MemAccessTy__anon3e7394c10111::MemAccessTy22209467b48Spatrick   MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
22309467b48Spatrick 
operator ==__anon3e7394c10111::MemAccessTy22409467b48Spatrick   bool operator==(MemAccessTy Other) const {
22509467b48Spatrick     return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
22609467b48Spatrick   }
22709467b48Spatrick 
operator !=__anon3e7394c10111::MemAccessTy22809467b48Spatrick   bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
22909467b48Spatrick 
getUnknown__anon3e7394c10111::MemAccessTy23009467b48Spatrick   static MemAccessTy getUnknown(LLVMContext &Ctx,
23109467b48Spatrick                                 unsigned AS = UnknownAddressSpace) {
23209467b48Spatrick     return MemAccessTy(Type::getVoidTy(Ctx), AS);
23309467b48Spatrick   }
23409467b48Spatrick 
getType__anon3e7394c10111::MemAccessTy23509467b48Spatrick   Type *getType() { return MemTy; }
23609467b48Spatrick };
23709467b48Spatrick 
23809467b48Spatrick /// This class holds data which is used to order reuse candidates.
23909467b48Spatrick class RegSortData {
24009467b48Spatrick public:
24109467b48Spatrick   /// This represents the set of LSRUse indices which reference
24209467b48Spatrick   /// a particular register.
24309467b48Spatrick   SmallBitVector UsedByIndices;
24409467b48Spatrick 
24509467b48Spatrick   void print(raw_ostream &OS) const;
24609467b48Spatrick   void dump() const;
24709467b48Spatrick };
24809467b48Spatrick 
24909467b48Spatrick } // end anonymous namespace
25009467b48Spatrick 
25109467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const25209467b48Spatrick void RegSortData::print(raw_ostream &OS) const {
25309467b48Spatrick   OS << "[NumUses=" << UsedByIndices.count() << ']';
25409467b48Spatrick }
25509467b48Spatrick 
dump() const25609467b48Spatrick LLVM_DUMP_METHOD void RegSortData::dump() const {
25709467b48Spatrick   print(errs()); errs() << '\n';
25809467b48Spatrick }
25909467b48Spatrick #endif
26009467b48Spatrick 
26109467b48Spatrick namespace {
26209467b48Spatrick 
26309467b48Spatrick /// Map register candidates to information about how they are used.
26409467b48Spatrick class RegUseTracker {
26509467b48Spatrick   using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
26609467b48Spatrick 
26709467b48Spatrick   RegUsesTy RegUsesMap;
26809467b48Spatrick   SmallVector<const SCEV *, 16> RegSequence;
26909467b48Spatrick 
27009467b48Spatrick public:
27109467b48Spatrick   void countRegister(const SCEV *Reg, size_t LUIdx);
27209467b48Spatrick   void dropRegister(const SCEV *Reg, size_t LUIdx);
27309467b48Spatrick   void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
27409467b48Spatrick 
27509467b48Spatrick   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
27609467b48Spatrick 
27709467b48Spatrick   const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
27809467b48Spatrick 
27909467b48Spatrick   void clear();
28009467b48Spatrick 
28109467b48Spatrick   using iterator = SmallVectorImpl<const SCEV *>::iterator;
28209467b48Spatrick   using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
28309467b48Spatrick 
begin()28409467b48Spatrick   iterator begin() { return RegSequence.begin(); }
end()28509467b48Spatrick   iterator end()   { return RegSequence.end(); }
begin() const28609467b48Spatrick   const_iterator begin() const { return RegSequence.begin(); }
end() const28709467b48Spatrick   const_iterator end() const   { return RegSequence.end(); }
28809467b48Spatrick };
28909467b48Spatrick 
29009467b48Spatrick } // end anonymous namespace
29109467b48Spatrick 
29209467b48Spatrick void
countRegister(const SCEV * Reg,size_t LUIdx)29309467b48Spatrick RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
29409467b48Spatrick   std::pair<RegUsesTy::iterator, bool> Pair =
29509467b48Spatrick     RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
29609467b48Spatrick   RegSortData &RSD = Pair.first->second;
29709467b48Spatrick   if (Pair.second)
29809467b48Spatrick     RegSequence.push_back(Reg);
29909467b48Spatrick   RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
30009467b48Spatrick   RSD.UsedByIndices.set(LUIdx);
30109467b48Spatrick }
30209467b48Spatrick 
30309467b48Spatrick void
dropRegister(const SCEV * Reg,size_t LUIdx)30409467b48Spatrick RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
30509467b48Spatrick   RegUsesTy::iterator It = RegUsesMap.find(Reg);
30609467b48Spatrick   assert(It != RegUsesMap.end());
30709467b48Spatrick   RegSortData &RSD = It->second;
30809467b48Spatrick   assert(RSD.UsedByIndices.size() > LUIdx);
30909467b48Spatrick   RSD.UsedByIndices.reset(LUIdx);
31009467b48Spatrick }
31109467b48Spatrick 
31209467b48Spatrick void
swapAndDropUse(size_t LUIdx,size_t LastLUIdx)31309467b48Spatrick RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
31409467b48Spatrick   assert(LUIdx <= LastLUIdx);
31509467b48Spatrick 
31609467b48Spatrick   // Update RegUses. The data structure is not optimized for this purpose;
31709467b48Spatrick   // we must iterate through it and update each of the bit vectors.
31809467b48Spatrick   for (auto &Pair : RegUsesMap) {
31909467b48Spatrick     SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
32009467b48Spatrick     if (LUIdx < UsedByIndices.size())
32109467b48Spatrick       UsedByIndices[LUIdx] =
32209467b48Spatrick         LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
32309467b48Spatrick     UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
32409467b48Spatrick   }
32509467b48Spatrick }
32609467b48Spatrick 
32709467b48Spatrick bool
isRegUsedByUsesOtherThan(const SCEV * Reg,size_t LUIdx) const32809467b48Spatrick RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
32909467b48Spatrick   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
33009467b48Spatrick   if (I == RegUsesMap.end())
33109467b48Spatrick     return false;
33209467b48Spatrick   const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
33309467b48Spatrick   int i = UsedByIndices.find_first();
33409467b48Spatrick   if (i == -1) return false;
33509467b48Spatrick   if ((size_t)i != LUIdx) return true;
33609467b48Spatrick   return UsedByIndices.find_next(i) != -1;
33709467b48Spatrick }
33809467b48Spatrick 
getUsedByIndices(const SCEV * Reg) const33909467b48Spatrick const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
34009467b48Spatrick   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
34109467b48Spatrick   assert(I != RegUsesMap.end() && "Unknown register!");
34209467b48Spatrick   return I->second.UsedByIndices;
34309467b48Spatrick }
34409467b48Spatrick 
clear()34509467b48Spatrick void RegUseTracker::clear() {
34609467b48Spatrick   RegUsesMap.clear();
34709467b48Spatrick   RegSequence.clear();
34809467b48Spatrick }
34909467b48Spatrick 
35009467b48Spatrick namespace {
35109467b48Spatrick 
35209467b48Spatrick /// This class holds information that describes a formula for computing
35309467b48Spatrick /// satisfying a use. It may include broken-out immediates and scaled registers.
35409467b48Spatrick struct Formula {
35509467b48Spatrick   /// Global base address used for complex addressing.
35609467b48Spatrick   GlobalValue *BaseGV = nullptr;
35709467b48Spatrick 
35809467b48Spatrick   /// Base offset for complex addressing.
35909467b48Spatrick   int64_t BaseOffset = 0;
36009467b48Spatrick 
36109467b48Spatrick   /// Whether any complex addressing has a base register.
36209467b48Spatrick   bool HasBaseReg = false;
36309467b48Spatrick 
36409467b48Spatrick   /// The scale of any complex addressing.
36509467b48Spatrick   int64_t Scale = 0;
36609467b48Spatrick 
36709467b48Spatrick   /// The list of "base" registers for this use. When this is non-empty. The
36809467b48Spatrick   /// canonical representation of a formula is
36909467b48Spatrick   /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
37009467b48Spatrick   /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
37109467b48Spatrick   /// 3. The reg containing recurrent expr related with currect loop in the
37209467b48Spatrick   /// formula should be put in the ScaledReg.
37309467b48Spatrick   /// #1 enforces that the scaled register is always used when at least two
37409467b48Spatrick   /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
37509467b48Spatrick   /// #2 enforces that 1 * reg is reg.
37609467b48Spatrick   /// #3 ensures invariant regs with respect to current loop can be combined
37709467b48Spatrick   /// together in LSR codegen.
37809467b48Spatrick   /// This invariant can be temporarily broken while building a formula.
37909467b48Spatrick   /// However, every formula inserted into the LSRInstance must be in canonical
38009467b48Spatrick   /// form.
38109467b48Spatrick   SmallVector<const SCEV *, 4> BaseRegs;
38209467b48Spatrick 
38309467b48Spatrick   /// The 'scaled' register for this use. This should be non-null when Scale is
38409467b48Spatrick   /// not zero.
38509467b48Spatrick   const SCEV *ScaledReg = nullptr;
38609467b48Spatrick 
38709467b48Spatrick   /// An additional constant offset which added near the use. This requires a
38809467b48Spatrick   /// temporary register, but the offset itself can live in an add immediate
38909467b48Spatrick   /// field rather than a register.
39009467b48Spatrick   int64_t UnfoldedOffset = 0;
39109467b48Spatrick 
39209467b48Spatrick   Formula() = default;
39309467b48Spatrick 
39409467b48Spatrick   void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
39509467b48Spatrick 
39609467b48Spatrick   bool isCanonical(const Loop &L) const;
39709467b48Spatrick 
39809467b48Spatrick   void canonicalize(const Loop &L);
39909467b48Spatrick 
40009467b48Spatrick   bool unscale();
40109467b48Spatrick 
40209467b48Spatrick   bool hasZeroEnd() const;
40309467b48Spatrick 
40409467b48Spatrick   size_t getNumRegs() const;
40509467b48Spatrick   Type *getType() const;
40609467b48Spatrick 
40709467b48Spatrick   void deleteBaseReg(const SCEV *&S);
40809467b48Spatrick 
40909467b48Spatrick   bool referencesReg(const SCEV *S) const;
41009467b48Spatrick   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
41109467b48Spatrick                                   const RegUseTracker &RegUses) const;
41209467b48Spatrick 
41309467b48Spatrick   void print(raw_ostream &OS) const;
41409467b48Spatrick   void dump() const;
41509467b48Spatrick };
41609467b48Spatrick 
41709467b48Spatrick } // end anonymous namespace
41809467b48Spatrick 
41909467b48Spatrick /// Recursion helper for initialMatch.
DoInitialMatch(const SCEV * S,Loop * L,SmallVectorImpl<const SCEV * > & Good,SmallVectorImpl<const SCEV * > & Bad,ScalarEvolution & SE)42009467b48Spatrick static void DoInitialMatch(const SCEV *S, Loop *L,
42109467b48Spatrick                            SmallVectorImpl<const SCEV *> &Good,
42209467b48Spatrick                            SmallVectorImpl<const SCEV *> &Bad,
42309467b48Spatrick                            ScalarEvolution &SE) {
42409467b48Spatrick   // Collect expressions which properly dominate the loop header.
42509467b48Spatrick   if (SE.properlyDominates(S, L->getHeader())) {
42609467b48Spatrick     Good.push_back(S);
42709467b48Spatrick     return;
42809467b48Spatrick   }
42909467b48Spatrick 
43009467b48Spatrick   // Look at add operands.
43109467b48Spatrick   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
43209467b48Spatrick     for (const SCEV *S : Add->operands())
43309467b48Spatrick       DoInitialMatch(S, L, Good, Bad, SE);
43409467b48Spatrick     return;
43509467b48Spatrick   }
43609467b48Spatrick 
43709467b48Spatrick   // Look at addrec operands.
43809467b48Spatrick   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
43909467b48Spatrick     if (!AR->getStart()->isZero() && AR->isAffine()) {
44009467b48Spatrick       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
44109467b48Spatrick       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
44209467b48Spatrick                                       AR->getStepRecurrence(SE),
44309467b48Spatrick                                       // FIXME: AR->getNoWrapFlags()
44409467b48Spatrick                                       AR->getLoop(), SCEV::FlagAnyWrap),
44509467b48Spatrick                      L, Good, Bad, SE);
44609467b48Spatrick       return;
44709467b48Spatrick     }
44809467b48Spatrick 
44909467b48Spatrick   // Handle a multiplication by -1 (negation) if it didn't fold.
45009467b48Spatrick   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
45109467b48Spatrick     if (Mul->getOperand(0)->isAllOnesValue()) {
45273471bf0Spatrick       SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
45309467b48Spatrick       const SCEV *NewMul = SE.getMulExpr(Ops);
45409467b48Spatrick 
45509467b48Spatrick       SmallVector<const SCEV *, 4> MyGood;
45609467b48Spatrick       SmallVector<const SCEV *, 4> MyBad;
45709467b48Spatrick       DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
45809467b48Spatrick       const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
45909467b48Spatrick         SE.getEffectiveSCEVType(NewMul->getType())));
46009467b48Spatrick       for (const SCEV *S : MyGood)
46109467b48Spatrick         Good.push_back(SE.getMulExpr(NegOne, S));
46209467b48Spatrick       for (const SCEV *S : MyBad)
46309467b48Spatrick         Bad.push_back(SE.getMulExpr(NegOne, S));
46409467b48Spatrick       return;
46509467b48Spatrick     }
46609467b48Spatrick 
46709467b48Spatrick   // Ok, we can't do anything interesting. Just stuff the whole thing into a
46809467b48Spatrick   // register and hope for the best.
46909467b48Spatrick   Bad.push_back(S);
47009467b48Spatrick }
47109467b48Spatrick 
47209467b48Spatrick /// Incorporate loop-variant parts of S into this Formula, attempting to keep
47309467b48Spatrick /// all loop-invariant and loop-computable values in a single base register.
initialMatch(const SCEV * S,Loop * L,ScalarEvolution & SE)47409467b48Spatrick void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
47509467b48Spatrick   SmallVector<const SCEV *, 4> Good;
47609467b48Spatrick   SmallVector<const SCEV *, 4> Bad;
47709467b48Spatrick   DoInitialMatch(S, L, Good, Bad, SE);
47809467b48Spatrick   if (!Good.empty()) {
47909467b48Spatrick     const SCEV *Sum = SE.getAddExpr(Good);
48009467b48Spatrick     if (!Sum->isZero())
48109467b48Spatrick       BaseRegs.push_back(Sum);
48209467b48Spatrick     HasBaseReg = true;
48309467b48Spatrick   }
48409467b48Spatrick   if (!Bad.empty()) {
48509467b48Spatrick     const SCEV *Sum = SE.getAddExpr(Bad);
48609467b48Spatrick     if (!Sum->isZero())
48709467b48Spatrick       BaseRegs.push_back(Sum);
48809467b48Spatrick     HasBaseReg = true;
48909467b48Spatrick   }
49009467b48Spatrick   canonicalize(*L);
49109467b48Spatrick }
49209467b48Spatrick 
containsAddRecDependentOnLoop(const SCEV * S,const Loop & L)493*d415bd75Srobert static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {
494*d415bd75Srobert   return SCEVExprContains(S, [&L](const SCEV *S) {
495*d415bd75Srobert     return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
496*d415bd75Srobert   });
497*d415bd75Srobert }
498*d415bd75Srobert 
49909467b48Spatrick /// Check whether or not this formula satisfies the canonical
50009467b48Spatrick /// representation.
50109467b48Spatrick /// \see Formula::BaseRegs.
isCanonical(const Loop & L) const50209467b48Spatrick bool Formula::isCanonical(const Loop &L) const {
50309467b48Spatrick   if (!ScaledReg)
50409467b48Spatrick     return BaseRegs.size() <= 1;
50509467b48Spatrick 
50609467b48Spatrick   if (Scale != 1)
50709467b48Spatrick     return true;
50809467b48Spatrick 
50909467b48Spatrick   if (Scale == 1 && BaseRegs.empty())
51009467b48Spatrick     return false;
51109467b48Spatrick 
512*d415bd75Srobert   if (containsAddRecDependentOnLoop(ScaledReg, L))
51309467b48Spatrick     return true;
51409467b48Spatrick 
51509467b48Spatrick   // If ScaledReg is not a recurrent expr, or it is but its loop is not current
51609467b48Spatrick   // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
51709467b48Spatrick   // loop, we want to swap the reg in BaseRegs with ScaledReg.
518*d415bd75Srobert   return none_of(BaseRegs, [&L](const SCEV *S) {
519*d415bd75Srobert     return containsAddRecDependentOnLoop(S, L);
52009467b48Spatrick   });
52109467b48Spatrick }
52209467b48Spatrick 
52309467b48Spatrick /// Helper method to morph a formula into its canonical representation.
52409467b48Spatrick /// \see Formula::BaseRegs.
52509467b48Spatrick /// Every formula having more than one base register, must use the ScaledReg
52609467b48Spatrick /// field. Otherwise, we would have to do special cases everywhere in LSR
52709467b48Spatrick /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
52809467b48Spatrick /// On the other hand, 1*reg should be canonicalized into reg.
canonicalize(const Loop & L)52909467b48Spatrick void Formula::canonicalize(const Loop &L) {
53009467b48Spatrick   if (isCanonical(L))
53109467b48Spatrick     return;
53273471bf0Spatrick 
53373471bf0Spatrick   if (BaseRegs.empty()) {
53473471bf0Spatrick     // No base reg? Use scale reg with scale = 1 as such.
53573471bf0Spatrick     assert(ScaledReg && "Expected 1*reg => reg");
53673471bf0Spatrick     assert(Scale == 1 && "Expected 1*reg => reg");
53773471bf0Spatrick     BaseRegs.push_back(ScaledReg);
53873471bf0Spatrick     Scale = 0;
53973471bf0Spatrick     ScaledReg = nullptr;
54073471bf0Spatrick     return;
54173471bf0Spatrick   }
54209467b48Spatrick 
54309467b48Spatrick   // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
54409467b48Spatrick   if (!ScaledReg) {
54573471bf0Spatrick     ScaledReg = BaseRegs.pop_back_val();
54609467b48Spatrick     Scale = 1;
54709467b48Spatrick   }
54809467b48Spatrick 
54909467b48Spatrick   // If ScaledReg is an invariant with respect to L, find the reg from
55009467b48Spatrick   // BaseRegs containing the recurrent expr related with Loop L. Swap the
55109467b48Spatrick   // reg with ScaledReg.
552*d415bd75Srobert   if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
553*d415bd75Srobert     auto I = find_if(BaseRegs, [&L](const SCEV *S) {
554*d415bd75Srobert       return containsAddRecDependentOnLoop(S, L);
55509467b48Spatrick     });
55609467b48Spatrick     if (I != BaseRegs.end())
55709467b48Spatrick       std::swap(ScaledReg, *I);
55809467b48Spatrick   }
55973471bf0Spatrick   assert(isCanonical(L) && "Failed to canonicalize?");
56009467b48Spatrick }
56109467b48Spatrick 
56209467b48Spatrick /// Get rid of the scale in the formula.
56309467b48Spatrick /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
56409467b48Spatrick /// \return true if it was possible to get rid of the scale, false otherwise.
56509467b48Spatrick /// \note After this operation the formula may not be in the canonical form.
unscale()56609467b48Spatrick bool Formula::unscale() {
56709467b48Spatrick   if (Scale != 1)
56809467b48Spatrick     return false;
56909467b48Spatrick   Scale = 0;
57009467b48Spatrick   BaseRegs.push_back(ScaledReg);
57109467b48Spatrick   ScaledReg = nullptr;
57209467b48Spatrick   return true;
57309467b48Spatrick }
57409467b48Spatrick 
hasZeroEnd() const57509467b48Spatrick bool Formula::hasZeroEnd() const {
57609467b48Spatrick   if (UnfoldedOffset || BaseOffset)
57709467b48Spatrick     return false;
57809467b48Spatrick   if (BaseRegs.size() != 1 || ScaledReg)
57909467b48Spatrick     return false;
58009467b48Spatrick   return true;
58109467b48Spatrick }
58209467b48Spatrick 
58309467b48Spatrick /// Return the total number of register operands used by this formula. This does
58409467b48Spatrick /// not include register uses implied by non-constant addrec strides.
getNumRegs() const58509467b48Spatrick size_t Formula::getNumRegs() const {
58609467b48Spatrick   return !!ScaledReg + BaseRegs.size();
58709467b48Spatrick }
58809467b48Spatrick 
58909467b48Spatrick /// Return the type of this formula, if it has one, or null otherwise. This type
59009467b48Spatrick /// is meaningless except for the bit size.
getType() const59109467b48Spatrick Type *Formula::getType() const {
59209467b48Spatrick   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
59309467b48Spatrick          ScaledReg ? ScaledReg->getType() :
59409467b48Spatrick          BaseGV ? BaseGV->getType() :
59509467b48Spatrick          nullptr;
59609467b48Spatrick }
59709467b48Spatrick 
59809467b48Spatrick /// Delete the given base reg from the BaseRegs list.
deleteBaseReg(const SCEV * & S)59909467b48Spatrick void Formula::deleteBaseReg(const SCEV *&S) {
60009467b48Spatrick   if (&S != &BaseRegs.back())
60109467b48Spatrick     std::swap(S, BaseRegs.back());
60209467b48Spatrick   BaseRegs.pop_back();
60309467b48Spatrick }
60409467b48Spatrick 
60509467b48Spatrick /// Test if this formula references the given register.
referencesReg(const SCEV * S) const60609467b48Spatrick bool Formula::referencesReg(const SCEV *S) const {
60709467b48Spatrick   return S == ScaledReg || is_contained(BaseRegs, S);
60809467b48Spatrick }
60909467b48Spatrick 
61009467b48Spatrick /// Test whether this formula uses registers which are used by uses other than
61109467b48Spatrick /// the use with the given index.
hasRegsUsedByUsesOtherThan(size_t LUIdx,const RegUseTracker & RegUses) const61209467b48Spatrick bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
61309467b48Spatrick                                          const RegUseTracker &RegUses) const {
61409467b48Spatrick   if (ScaledReg)
61509467b48Spatrick     if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
61609467b48Spatrick       return true;
61709467b48Spatrick   for (const SCEV *BaseReg : BaseRegs)
61809467b48Spatrick     if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
61909467b48Spatrick       return true;
62009467b48Spatrick   return false;
62109467b48Spatrick }
62209467b48Spatrick 
62309467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const62409467b48Spatrick void Formula::print(raw_ostream &OS) const {
62509467b48Spatrick   bool First = true;
62609467b48Spatrick   if (BaseGV) {
62709467b48Spatrick     if (!First) OS << " + "; else First = false;
62809467b48Spatrick     BaseGV->printAsOperand(OS, /*PrintType=*/false);
62909467b48Spatrick   }
63009467b48Spatrick   if (BaseOffset != 0) {
63109467b48Spatrick     if (!First) OS << " + "; else First = false;
63209467b48Spatrick     OS << BaseOffset;
63309467b48Spatrick   }
63409467b48Spatrick   for (const SCEV *BaseReg : BaseRegs) {
63509467b48Spatrick     if (!First) OS << " + "; else First = false;
63609467b48Spatrick     OS << "reg(" << *BaseReg << ')';
63709467b48Spatrick   }
63809467b48Spatrick   if (HasBaseReg && BaseRegs.empty()) {
63909467b48Spatrick     if (!First) OS << " + "; else First = false;
64009467b48Spatrick     OS << "**error: HasBaseReg**";
64109467b48Spatrick   } else if (!HasBaseReg && !BaseRegs.empty()) {
64209467b48Spatrick     if (!First) OS << " + "; else First = false;
64309467b48Spatrick     OS << "**error: !HasBaseReg**";
64409467b48Spatrick   }
64509467b48Spatrick   if (Scale != 0) {
64609467b48Spatrick     if (!First) OS << " + "; else First = false;
64709467b48Spatrick     OS << Scale << "*reg(";
64809467b48Spatrick     if (ScaledReg)
64909467b48Spatrick       OS << *ScaledReg;
65009467b48Spatrick     else
65109467b48Spatrick       OS << "<unknown>";
65209467b48Spatrick     OS << ')';
65309467b48Spatrick   }
65409467b48Spatrick   if (UnfoldedOffset != 0) {
65509467b48Spatrick     if (!First) OS << " + ";
65609467b48Spatrick     OS << "imm(" << UnfoldedOffset << ')';
65709467b48Spatrick   }
65809467b48Spatrick }
65909467b48Spatrick 
dump() const66009467b48Spatrick LLVM_DUMP_METHOD void Formula::dump() const {
66109467b48Spatrick   print(errs()); errs() << '\n';
66209467b48Spatrick }
66309467b48Spatrick #endif
66409467b48Spatrick 
66509467b48Spatrick /// Return true if the given addrec can be sign-extended without changing its
66609467b48Spatrick /// value.
isAddRecSExtable(const SCEVAddRecExpr * AR,ScalarEvolution & SE)66709467b48Spatrick static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
66809467b48Spatrick   Type *WideTy =
66909467b48Spatrick     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
67009467b48Spatrick   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
67109467b48Spatrick }
67209467b48Spatrick 
67309467b48Spatrick /// Return true if the given add can be sign-extended without changing its
67409467b48Spatrick /// value.
isAddSExtable(const SCEVAddExpr * A,ScalarEvolution & SE)67509467b48Spatrick static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
67609467b48Spatrick   Type *WideTy =
67709467b48Spatrick     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
67809467b48Spatrick   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
67909467b48Spatrick }
68009467b48Spatrick 
68109467b48Spatrick /// Return true if the given mul can be sign-extended without changing its
68209467b48Spatrick /// value.
isMulSExtable(const SCEVMulExpr * M,ScalarEvolution & SE)68309467b48Spatrick static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
68409467b48Spatrick   Type *WideTy =
68509467b48Spatrick     IntegerType::get(SE.getContext(),
68609467b48Spatrick                      SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
68709467b48Spatrick   return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
68809467b48Spatrick }
68909467b48Spatrick 
69009467b48Spatrick /// Return an expression for LHS /s RHS, if it can be determined and if the
69109467b48Spatrick /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
69273471bf0Spatrick /// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that
69309467b48Spatrick /// the multiplication may overflow, which is useful when the result will be
69409467b48Spatrick /// used in a context where the most significant bits are ignored.
getExactSDiv(const SCEV * LHS,const SCEV * RHS,ScalarEvolution & SE,bool IgnoreSignificantBits=false)69509467b48Spatrick static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
69609467b48Spatrick                                 ScalarEvolution &SE,
69709467b48Spatrick                                 bool IgnoreSignificantBits = false) {
69809467b48Spatrick   // Handle the trivial case, which works for any SCEV type.
69909467b48Spatrick   if (LHS == RHS)
70009467b48Spatrick     return SE.getConstant(LHS->getType(), 1);
70109467b48Spatrick 
70209467b48Spatrick   // Handle a few RHS special cases.
70309467b48Spatrick   const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
70409467b48Spatrick   if (RC) {
70509467b48Spatrick     const APInt &RA = RC->getAPInt();
70609467b48Spatrick     // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
70709467b48Spatrick     // some folding.
708*d415bd75Srobert     if (RA.isAllOnes()) {
70973471bf0Spatrick       if (LHS->getType()->isPointerTy())
71073471bf0Spatrick         return nullptr;
71109467b48Spatrick       return SE.getMulExpr(LHS, RC);
71273471bf0Spatrick     }
71309467b48Spatrick     // Handle x /s 1 as x.
71409467b48Spatrick     if (RA == 1)
71509467b48Spatrick       return LHS;
71609467b48Spatrick   }
71709467b48Spatrick 
71809467b48Spatrick   // Check for a division of a constant by a constant.
71909467b48Spatrick   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
72009467b48Spatrick     if (!RC)
72109467b48Spatrick       return nullptr;
72209467b48Spatrick     const APInt &LA = C->getAPInt();
72309467b48Spatrick     const APInt &RA = RC->getAPInt();
72409467b48Spatrick     if (LA.srem(RA) != 0)
72509467b48Spatrick       return nullptr;
72609467b48Spatrick     return SE.getConstant(LA.sdiv(RA));
72709467b48Spatrick   }
72809467b48Spatrick 
72909467b48Spatrick   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
73009467b48Spatrick   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
73109467b48Spatrick     if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
73209467b48Spatrick       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
73309467b48Spatrick                                       IgnoreSignificantBits);
73409467b48Spatrick       if (!Step) return nullptr;
73509467b48Spatrick       const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
73609467b48Spatrick                                        IgnoreSignificantBits);
73709467b48Spatrick       if (!Start) return nullptr;
73809467b48Spatrick       // FlagNW is independent of the start value, step direction, and is
73909467b48Spatrick       // preserved with smaller magnitude steps.
74009467b48Spatrick       // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
74109467b48Spatrick       return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
74209467b48Spatrick     }
74309467b48Spatrick     return nullptr;
74409467b48Spatrick   }
74509467b48Spatrick 
74609467b48Spatrick   // Distribute the sdiv over add operands, if the add doesn't overflow.
74709467b48Spatrick   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
74809467b48Spatrick     if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
74909467b48Spatrick       SmallVector<const SCEV *, 8> Ops;
75009467b48Spatrick       for (const SCEV *S : Add->operands()) {
75109467b48Spatrick         const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
75209467b48Spatrick         if (!Op) return nullptr;
75309467b48Spatrick         Ops.push_back(Op);
75409467b48Spatrick       }
75509467b48Spatrick       return SE.getAddExpr(Ops);
75609467b48Spatrick     }
75709467b48Spatrick     return nullptr;
75809467b48Spatrick   }
75909467b48Spatrick 
76009467b48Spatrick   // Check for a multiply operand that we can pull RHS out of.
76109467b48Spatrick   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
76209467b48Spatrick     if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
76373471bf0Spatrick       // Handle special case C1*X*Y /s C2*X*Y.
76473471bf0Spatrick       if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {
76573471bf0Spatrick         if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {
76673471bf0Spatrick           const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
76773471bf0Spatrick           const SCEVConstant *RC =
76873471bf0Spatrick               dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
76973471bf0Spatrick           if (LC && RC) {
77073471bf0Spatrick             SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));
77173471bf0Spatrick             SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));
77273471bf0Spatrick             if (LOps == ROps)
77373471bf0Spatrick               return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);
77473471bf0Spatrick           }
77573471bf0Spatrick         }
77673471bf0Spatrick       }
77773471bf0Spatrick 
77809467b48Spatrick       SmallVector<const SCEV *, 4> Ops;
77909467b48Spatrick       bool Found = false;
78009467b48Spatrick       for (const SCEV *S : Mul->operands()) {
78109467b48Spatrick         if (!Found)
78209467b48Spatrick           if (const SCEV *Q = getExactSDiv(S, RHS, SE,
78309467b48Spatrick                                            IgnoreSignificantBits)) {
78409467b48Spatrick             S = Q;
78509467b48Spatrick             Found = true;
78609467b48Spatrick           }
78709467b48Spatrick         Ops.push_back(S);
78809467b48Spatrick       }
78909467b48Spatrick       return Found ? SE.getMulExpr(Ops) : nullptr;
79009467b48Spatrick     }
79109467b48Spatrick     return nullptr;
79209467b48Spatrick   }
79309467b48Spatrick 
79409467b48Spatrick   // Otherwise we don't know.
79509467b48Spatrick   return nullptr;
79609467b48Spatrick }
79709467b48Spatrick 
79809467b48Spatrick /// If S involves the addition of a constant integer value, return that integer
79909467b48Spatrick /// value, and mutate S to point to a new SCEV with that value excluded.
ExtractImmediate(const SCEV * & S,ScalarEvolution & SE)80009467b48Spatrick static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
80109467b48Spatrick   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
80209467b48Spatrick     if (C->getAPInt().getMinSignedBits() <= 64) {
80309467b48Spatrick       S = SE.getConstant(C->getType(), 0);
80409467b48Spatrick       return C->getValue()->getSExtValue();
80509467b48Spatrick     }
80609467b48Spatrick   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
80773471bf0Spatrick     SmallVector<const SCEV *, 8> NewOps(Add->operands());
80809467b48Spatrick     int64_t Result = ExtractImmediate(NewOps.front(), SE);
80909467b48Spatrick     if (Result != 0)
81009467b48Spatrick       S = SE.getAddExpr(NewOps);
81109467b48Spatrick     return Result;
81209467b48Spatrick   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
81373471bf0Spatrick     SmallVector<const SCEV *, 8> NewOps(AR->operands());
81409467b48Spatrick     int64_t Result = ExtractImmediate(NewOps.front(), SE);
81509467b48Spatrick     if (Result != 0)
81609467b48Spatrick       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
81709467b48Spatrick                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
81809467b48Spatrick                            SCEV::FlagAnyWrap);
81909467b48Spatrick     return Result;
82009467b48Spatrick   }
82109467b48Spatrick   return 0;
82209467b48Spatrick }
82309467b48Spatrick 
82409467b48Spatrick /// If S involves the addition of a GlobalValue address, return that symbol, and
82509467b48Spatrick /// mutate S to point to a new SCEV with that value excluded.
ExtractSymbol(const SCEV * & S,ScalarEvolution & SE)82609467b48Spatrick static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
82709467b48Spatrick   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
82809467b48Spatrick     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
82909467b48Spatrick       S = SE.getConstant(GV->getType(), 0);
83009467b48Spatrick       return GV;
83109467b48Spatrick     }
83209467b48Spatrick   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
83373471bf0Spatrick     SmallVector<const SCEV *, 8> NewOps(Add->operands());
83409467b48Spatrick     GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
83509467b48Spatrick     if (Result)
83609467b48Spatrick       S = SE.getAddExpr(NewOps);
83709467b48Spatrick     return Result;
83809467b48Spatrick   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
83973471bf0Spatrick     SmallVector<const SCEV *, 8> NewOps(AR->operands());
84009467b48Spatrick     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
84109467b48Spatrick     if (Result)
84209467b48Spatrick       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
84309467b48Spatrick                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
84409467b48Spatrick                            SCEV::FlagAnyWrap);
84509467b48Spatrick     return Result;
84609467b48Spatrick   }
84709467b48Spatrick   return nullptr;
84809467b48Spatrick }
84909467b48Spatrick 
85009467b48Spatrick /// Returns true if the specified instruction is using the specified value as an
85109467b48Spatrick /// address.
isAddressUse(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)85209467b48Spatrick static bool isAddressUse(const TargetTransformInfo &TTI,
85309467b48Spatrick                          Instruction *Inst, Value *OperandVal) {
85409467b48Spatrick   bool isAddress = isa<LoadInst>(Inst);
85509467b48Spatrick   if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
85609467b48Spatrick     if (SI->getPointerOperand() == OperandVal)
85709467b48Spatrick       isAddress = true;
85809467b48Spatrick   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
85909467b48Spatrick     // Addressing modes can also be folded into prefetches and a variety
86009467b48Spatrick     // of intrinsics.
86109467b48Spatrick     switch (II->getIntrinsicID()) {
86209467b48Spatrick     case Intrinsic::memset:
86309467b48Spatrick     case Intrinsic::prefetch:
864097a140dSpatrick     case Intrinsic::masked_load:
86509467b48Spatrick       if (II->getArgOperand(0) == OperandVal)
86609467b48Spatrick         isAddress = true;
86709467b48Spatrick       break;
868097a140dSpatrick     case Intrinsic::masked_store:
869097a140dSpatrick       if (II->getArgOperand(1) == OperandVal)
870097a140dSpatrick         isAddress = true;
871097a140dSpatrick       break;
87209467b48Spatrick     case Intrinsic::memmove:
87309467b48Spatrick     case Intrinsic::memcpy:
87409467b48Spatrick       if (II->getArgOperand(0) == OperandVal ||
87509467b48Spatrick           II->getArgOperand(1) == OperandVal)
87609467b48Spatrick         isAddress = true;
87709467b48Spatrick       break;
87809467b48Spatrick     default: {
87909467b48Spatrick       MemIntrinsicInfo IntrInfo;
88009467b48Spatrick       if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
88109467b48Spatrick         if (IntrInfo.PtrVal == OperandVal)
88209467b48Spatrick           isAddress = true;
88309467b48Spatrick       }
88409467b48Spatrick     }
88509467b48Spatrick     }
88609467b48Spatrick   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
88709467b48Spatrick     if (RMW->getPointerOperand() == OperandVal)
88809467b48Spatrick       isAddress = true;
88909467b48Spatrick   } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
89009467b48Spatrick     if (CmpX->getPointerOperand() == OperandVal)
89109467b48Spatrick       isAddress = true;
89209467b48Spatrick   }
89309467b48Spatrick   return isAddress;
89409467b48Spatrick }
89509467b48Spatrick 
89609467b48Spatrick /// Return the type of the memory being accessed.
getAccessType(const TargetTransformInfo & TTI,Instruction * Inst,Value * OperandVal)89709467b48Spatrick static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
89809467b48Spatrick                                  Instruction *Inst, Value *OperandVal) {
89909467b48Spatrick   MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
90009467b48Spatrick   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
90109467b48Spatrick     AccessTy.MemTy = SI->getOperand(0)->getType();
90209467b48Spatrick     AccessTy.AddrSpace = SI->getPointerAddressSpace();
90309467b48Spatrick   } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
90409467b48Spatrick     AccessTy.AddrSpace = LI->getPointerAddressSpace();
90509467b48Spatrick   } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
90609467b48Spatrick     AccessTy.AddrSpace = RMW->getPointerAddressSpace();
90709467b48Spatrick   } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
90809467b48Spatrick     AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
90909467b48Spatrick   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
91009467b48Spatrick     switch (II->getIntrinsicID()) {
91109467b48Spatrick     case Intrinsic::prefetch:
91209467b48Spatrick     case Intrinsic::memset:
91309467b48Spatrick       AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
91409467b48Spatrick       AccessTy.MemTy = OperandVal->getType();
91509467b48Spatrick       break;
91609467b48Spatrick     case Intrinsic::memmove:
91709467b48Spatrick     case Intrinsic::memcpy:
91809467b48Spatrick       AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
91909467b48Spatrick       AccessTy.MemTy = OperandVal->getType();
92009467b48Spatrick       break;
921097a140dSpatrick     case Intrinsic::masked_load:
922097a140dSpatrick       AccessTy.AddrSpace =
923097a140dSpatrick           II->getArgOperand(0)->getType()->getPointerAddressSpace();
924097a140dSpatrick       break;
925097a140dSpatrick     case Intrinsic::masked_store:
926097a140dSpatrick       AccessTy.MemTy = II->getOperand(0)->getType();
927097a140dSpatrick       AccessTy.AddrSpace =
928097a140dSpatrick           II->getArgOperand(1)->getType()->getPointerAddressSpace();
929097a140dSpatrick       break;
93009467b48Spatrick     default: {
93109467b48Spatrick       MemIntrinsicInfo IntrInfo;
93209467b48Spatrick       if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
93309467b48Spatrick         AccessTy.AddrSpace
93409467b48Spatrick           = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
93509467b48Spatrick       }
93609467b48Spatrick 
93709467b48Spatrick       break;
93809467b48Spatrick     }
93909467b48Spatrick     }
94009467b48Spatrick   }
94109467b48Spatrick 
94209467b48Spatrick   // All pointers have the same requirements, so canonicalize them to an
94309467b48Spatrick   // arbitrary pointer type to minimize variation.
94409467b48Spatrick   if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
94509467b48Spatrick     AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
94609467b48Spatrick                                       PTy->getAddressSpace());
94709467b48Spatrick 
94809467b48Spatrick   return AccessTy;
94909467b48Spatrick }
95009467b48Spatrick 
95109467b48Spatrick /// Return true if this AddRec is already a phi in its loop.
isExistingPhi(const SCEVAddRecExpr * AR,ScalarEvolution & SE)95209467b48Spatrick static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
95309467b48Spatrick   for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
95409467b48Spatrick     if (SE.isSCEVable(PN.getType()) &&
95509467b48Spatrick         (SE.getEffectiveSCEVType(PN.getType()) ==
95609467b48Spatrick          SE.getEffectiveSCEVType(AR->getType())) &&
95709467b48Spatrick         SE.getSCEV(&PN) == AR)
95809467b48Spatrick       return true;
95909467b48Spatrick   }
96009467b48Spatrick   return false;
96109467b48Spatrick }
96209467b48Spatrick 
96309467b48Spatrick /// Check if expanding this expression is likely to incur significant cost. This
96409467b48Spatrick /// is tricky because SCEV doesn't track which expressions are actually computed
96509467b48Spatrick /// by the current IR.
96609467b48Spatrick ///
96709467b48Spatrick /// We currently allow expansion of IV increments that involve adds,
96809467b48Spatrick /// multiplication by constants, and AddRecs from existing phis.
96909467b48Spatrick ///
97009467b48Spatrick /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
97109467b48Spatrick /// obvious multiple of the UDivExpr.
isHighCostExpansion(const SCEV * S,SmallPtrSetImpl<const SCEV * > & Processed,ScalarEvolution & SE)97209467b48Spatrick static bool isHighCostExpansion(const SCEV *S,
97309467b48Spatrick                                 SmallPtrSetImpl<const SCEV*> &Processed,
97409467b48Spatrick                                 ScalarEvolution &SE) {
97509467b48Spatrick   // Zero/One operand expressions
97609467b48Spatrick   switch (S->getSCEVType()) {
97709467b48Spatrick   case scUnknown:
97809467b48Spatrick   case scConstant:
97909467b48Spatrick     return false;
98009467b48Spatrick   case scTruncate:
98109467b48Spatrick     return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
98209467b48Spatrick                                Processed, SE);
98309467b48Spatrick   case scZeroExtend:
98409467b48Spatrick     return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
98509467b48Spatrick                                Processed, SE);
98609467b48Spatrick   case scSignExtend:
98709467b48Spatrick     return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
98809467b48Spatrick                                Processed, SE);
98973471bf0Spatrick   default:
99073471bf0Spatrick     break;
99109467b48Spatrick   }
99209467b48Spatrick 
99309467b48Spatrick   if (!Processed.insert(S).second)
99409467b48Spatrick     return false;
99509467b48Spatrick 
99609467b48Spatrick   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
99709467b48Spatrick     for (const SCEV *S : Add->operands()) {
99809467b48Spatrick       if (isHighCostExpansion(S, Processed, SE))
99909467b48Spatrick         return true;
100009467b48Spatrick     }
100109467b48Spatrick     return false;
100209467b48Spatrick   }
100309467b48Spatrick 
100409467b48Spatrick   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
100509467b48Spatrick     if (Mul->getNumOperands() == 2) {
100609467b48Spatrick       // Multiplication by a constant is ok
100709467b48Spatrick       if (isa<SCEVConstant>(Mul->getOperand(0)))
100809467b48Spatrick         return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
100909467b48Spatrick 
101009467b48Spatrick       // If we have the value of one operand, check if an existing
101109467b48Spatrick       // multiplication already generates this expression.
101209467b48Spatrick       if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
101309467b48Spatrick         Value *UVal = U->getValue();
101409467b48Spatrick         for (User *UR : UVal->users()) {
101509467b48Spatrick           // If U is a constant, it may be used by a ConstantExpr.
101609467b48Spatrick           Instruction *UI = dyn_cast<Instruction>(UR);
101709467b48Spatrick           if (UI && UI->getOpcode() == Instruction::Mul &&
101809467b48Spatrick               SE.isSCEVable(UI->getType())) {
101909467b48Spatrick             return SE.getSCEV(UI) == Mul;
102009467b48Spatrick           }
102109467b48Spatrick         }
102209467b48Spatrick       }
102309467b48Spatrick     }
102409467b48Spatrick   }
102509467b48Spatrick 
102609467b48Spatrick   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
102709467b48Spatrick     if (isExistingPhi(AR, SE))
102809467b48Spatrick       return false;
102909467b48Spatrick   }
103009467b48Spatrick 
103109467b48Spatrick   // Fow now, consider any other type of expression (div/mul/min/max) high cost.
103209467b48Spatrick   return true;
103309467b48Spatrick }
103409467b48Spatrick 
103509467b48Spatrick namespace {
103609467b48Spatrick 
103709467b48Spatrick class LSRUse;
103809467b48Spatrick 
103909467b48Spatrick } // end anonymous namespace
104009467b48Spatrick 
104109467b48Spatrick /// Check if the addressing mode defined by \p F is completely
104209467b48Spatrick /// folded in \p LU at isel time.
104309467b48Spatrick /// This includes address-mode folding and special icmp tricks.
104409467b48Spatrick /// This function returns true if \p LU can accommodate what \p F
104509467b48Spatrick /// defines and up to 1 base + 1 scaled + offset.
104609467b48Spatrick /// In other words, if \p F has several base registers, this function may
104709467b48Spatrick /// still return true. Therefore, users still need to account for
104809467b48Spatrick /// additional base registers and/or unfolded offsets to derive an
104909467b48Spatrick /// accurate cost model.
105009467b48Spatrick static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
105109467b48Spatrick                                  const LSRUse &LU, const Formula &F);
105209467b48Spatrick 
105309467b48Spatrick // Get the cost of the scaling factor used in F for LU.
105473471bf0Spatrick static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
105509467b48Spatrick                                             const LSRUse &LU, const Formula &F,
105609467b48Spatrick                                             const Loop &L);
105709467b48Spatrick 
105809467b48Spatrick namespace {
105909467b48Spatrick 
106009467b48Spatrick /// This class is used to measure and compare candidate formulae.
106109467b48Spatrick class Cost {
106209467b48Spatrick   const Loop *L = nullptr;
106309467b48Spatrick   ScalarEvolution *SE = nullptr;
106409467b48Spatrick   const TargetTransformInfo *TTI = nullptr;
106509467b48Spatrick   TargetTransformInfo::LSRCost C;
106673471bf0Spatrick   TTI::AddressingModeKind AMK = TTI::AMK_None;
106709467b48Spatrick 
106809467b48Spatrick public:
106909467b48Spatrick   Cost() = delete;
Cost(const Loop * L,ScalarEvolution & SE,const TargetTransformInfo & TTI,TTI::AddressingModeKind AMK)107073471bf0Spatrick   Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
107173471bf0Spatrick        TTI::AddressingModeKind AMK) :
107273471bf0Spatrick     L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
107309467b48Spatrick     C.Insns = 0;
107409467b48Spatrick     C.NumRegs = 0;
107509467b48Spatrick     C.AddRecCost = 0;
107609467b48Spatrick     C.NumIVMuls = 0;
107709467b48Spatrick     C.NumBaseAdds = 0;
107809467b48Spatrick     C.ImmCost = 0;
107909467b48Spatrick     C.SetupCost = 0;
108009467b48Spatrick     C.ScaleCost = 0;
108109467b48Spatrick   }
108209467b48Spatrick 
1083*d415bd75Srobert   bool isLess(const Cost &Other) const;
108409467b48Spatrick 
108509467b48Spatrick   void Lose();
108609467b48Spatrick 
108709467b48Spatrick #ifndef NDEBUG
108809467b48Spatrick   // Once any of the metrics loses, they must all remain losers.
isValid()108909467b48Spatrick   bool isValid() {
109009467b48Spatrick     return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
109109467b48Spatrick              | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
109209467b48Spatrick       || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
109309467b48Spatrick            & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
109409467b48Spatrick   }
109509467b48Spatrick #endif
109609467b48Spatrick 
isLoser()109709467b48Spatrick   bool isLoser() {
109809467b48Spatrick     assert(isValid() && "invalid cost");
109909467b48Spatrick     return C.NumRegs == ~0u;
110009467b48Spatrick   }
110109467b48Spatrick 
110209467b48Spatrick   void RateFormula(const Formula &F,
110309467b48Spatrick                    SmallPtrSetImpl<const SCEV *> &Regs,
110409467b48Spatrick                    const DenseSet<const SCEV *> &VisitedRegs,
110509467b48Spatrick                    const LSRUse &LU,
110609467b48Spatrick                    SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
110709467b48Spatrick 
110809467b48Spatrick   void print(raw_ostream &OS) const;
110909467b48Spatrick   void dump() const;
111009467b48Spatrick 
111109467b48Spatrick private:
111209467b48Spatrick   void RateRegister(const Formula &F, const SCEV *Reg,
111309467b48Spatrick                     SmallPtrSetImpl<const SCEV *> &Regs);
111409467b48Spatrick   void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
111509467b48Spatrick                            SmallPtrSetImpl<const SCEV *> &Regs,
111609467b48Spatrick                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
111709467b48Spatrick };
111809467b48Spatrick 
111909467b48Spatrick /// An operand value in an instruction which is to be replaced with some
112009467b48Spatrick /// equivalent, possibly strength-reduced, replacement.
112109467b48Spatrick struct LSRFixup {
112209467b48Spatrick   /// The instruction which will be updated.
112309467b48Spatrick   Instruction *UserInst = nullptr;
112409467b48Spatrick 
112509467b48Spatrick   /// The operand of the instruction which will be replaced. The operand may be
112609467b48Spatrick   /// used more than once; every instance will be replaced.
112709467b48Spatrick   Value *OperandValToReplace = nullptr;
112809467b48Spatrick 
112909467b48Spatrick   /// If this user is to use the post-incremented value of an induction
113009467b48Spatrick   /// variable, this set is non-empty and holds the loops associated with the
113109467b48Spatrick   /// induction variable.
113209467b48Spatrick   PostIncLoopSet PostIncLoops;
113309467b48Spatrick 
113409467b48Spatrick   /// A constant offset to be added to the LSRUse expression.  This allows
113509467b48Spatrick   /// multiple fixups to share the same LSRUse with different offsets, for
113609467b48Spatrick   /// example in an unrolled loop.
113709467b48Spatrick   int64_t Offset = 0;
113809467b48Spatrick 
113909467b48Spatrick   LSRFixup() = default;
114009467b48Spatrick 
114109467b48Spatrick   bool isUseFullyOutsideLoop(const Loop *L) const;
114209467b48Spatrick 
114309467b48Spatrick   void print(raw_ostream &OS) const;
114409467b48Spatrick   void dump() const;
114509467b48Spatrick };
114609467b48Spatrick 
114709467b48Spatrick /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
114809467b48Spatrick /// SmallVectors of const SCEV*.
114909467b48Spatrick struct UniquifierDenseMapInfo {
getEmptyKey__anon3e7394c10811::UniquifierDenseMapInfo115009467b48Spatrick   static SmallVector<const SCEV *, 4> getEmptyKey() {
115109467b48Spatrick     SmallVector<const SCEV *, 4>  V;
115209467b48Spatrick     V.push_back(reinterpret_cast<const SCEV *>(-1));
115309467b48Spatrick     return V;
115409467b48Spatrick   }
115509467b48Spatrick 
getTombstoneKey__anon3e7394c10811::UniquifierDenseMapInfo115609467b48Spatrick   static SmallVector<const SCEV *, 4> getTombstoneKey() {
115709467b48Spatrick     SmallVector<const SCEV *, 4> V;
115809467b48Spatrick     V.push_back(reinterpret_cast<const SCEV *>(-2));
115909467b48Spatrick     return V;
116009467b48Spatrick   }
116109467b48Spatrick 
getHashValue__anon3e7394c10811::UniquifierDenseMapInfo116209467b48Spatrick   static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
116309467b48Spatrick     return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
116409467b48Spatrick   }
116509467b48Spatrick 
isEqual__anon3e7394c10811::UniquifierDenseMapInfo116609467b48Spatrick   static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
116709467b48Spatrick                       const SmallVector<const SCEV *, 4> &RHS) {
116809467b48Spatrick     return LHS == RHS;
116909467b48Spatrick   }
117009467b48Spatrick };
117109467b48Spatrick 
117209467b48Spatrick /// This class holds the state that LSR keeps for each use in IVUsers, as well
117309467b48Spatrick /// as uses invented by LSR itself. It includes information about what kinds of
117409467b48Spatrick /// things can be folded into the user, information about the user itself, and
117509467b48Spatrick /// information about how the use may be satisfied.  TODO: Represent multiple
117609467b48Spatrick /// users of the same expression in common?
117709467b48Spatrick class LSRUse {
117809467b48Spatrick   DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
117909467b48Spatrick 
118009467b48Spatrick public:
118109467b48Spatrick   /// An enum for a kind of use, indicating what types of scaled and immediate
118209467b48Spatrick   /// operands it might support.
118309467b48Spatrick   enum KindType {
118409467b48Spatrick     Basic,   ///< A normal use, with no folding.
118509467b48Spatrick     Special, ///< A special case of basic, allowing -1 scales.
118609467b48Spatrick     Address, ///< An address use; folding according to TargetLowering
118709467b48Spatrick     ICmpZero ///< An equality icmp with both operands folded into one.
118809467b48Spatrick     // TODO: Add a generic icmp too?
118909467b48Spatrick   };
119009467b48Spatrick 
119109467b48Spatrick   using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
119209467b48Spatrick 
119309467b48Spatrick   KindType Kind;
119409467b48Spatrick   MemAccessTy AccessTy;
119509467b48Spatrick 
119609467b48Spatrick   /// The list of operands which are to be replaced.
119709467b48Spatrick   SmallVector<LSRFixup, 8> Fixups;
119809467b48Spatrick 
119909467b48Spatrick   /// Keep track of the min and max offsets of the fixups.
120009467b48Spatrick   int64_t MinOffset = std::numeric_limits<int64_t>::max();
120109467b48Spatrick   int64_t MaxOffset = std::numeric_limits<int64_t>::min();
120209467b48Spatrick 
120309467b48Spatrick   /// This records whether all of the fixups using this LSRUse are outside of
120409467b48Spatrick   /// the loop, in which case some special-case heuristics may be used.
120509467b48Spatrick   bool AllFixupsOutsideLoop = true;
120609467b48Spatrick 
120709467b48Spatrick   /// RigidFormula is set to true to guarantee that this use will be associated
120809467b48Spatrick   /// with a single formula--the one that initially matched. Some SCEV
120909467b48Spatrick   /// expressions cannot be expanded. This allows LSR to consider the registers
121009467b48Spatrick   /// used by those expressions without the need to expand them later after
121109467b48Spatrick   /// changing the formula.
121209467b48Spatrick   bool RigidFormula = false;
121309467b48Spatrick 
121409467b48Spatrick   /// This records the widest use type for any fixup using this
121509467b48Spatrick   /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
121609467b48Spatrick   /// fixup widths to be equivalent, because the narrower one may be relying on
121709467b48Spatrick   /// the implicit truncation to truncate away bogus bits.
121809467b48Spatrick   Type *WidestFixupType = nullptr;
121909467b48Spatrick 
122009467b48Spatrick   /// A list of ways to build a value that can satisfy this user.  After the
122109467b48Spatrick   /// list is populated, one of these is selected heuristically and used to
122209467b48Spatrick   /// formulate a replacement for OperandValToReplace in UserInst.
122309467b48Spatrick   SmallVector<Formula, 12> Formulae;
122409467b48Spatrick 
122509467b48Spatrick   /// The set of register candidates used by all formulae in this LSRUse.
122609467b48Spatrick   SmallPtrSet<const SCEV *, 4> Regs;
122709467b48Spatrick 
LSRUse(KindType K,MemAccessTy AT)122809467b48Spatrick   LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
122909467b48Spatrick 
getNewFixup()123009467b48Spatrick   LSRFixup &getNewFixup() {
123109467b48Spatrick     Fixups.push_back(LSRFixup());
123209467b48Spatrick     return Fixups.back();
123309467b48Spatrick   }
123409467b48Spatrick 
pushFixup(LSRFixup & f)123509467b48Spatrick   void pushFixup(LSRFixup &f) {
123609467b48Spatrick     Fixups.push_back(f);
123709467b48Spatrick     if (f.Offset > MaxOffset)
123809467b48Spatrick       MaxOffset = f.Offset;
123909467b48Spatrick     if (f.Offset < MinOffset)
124009467b48Spatrick       MinOffset = f.Offset;
124109467b48Spatrick   }
124209467b48Spatrick 
124309467b48Spatrick   bool HasFormulaWithSameRegs(const Formula &F) const;
124409467b48Spatrick   float getNotSelectedProbability(const SCEV *Reg) const;
124509467b48Spatrick   bool InsertFormula(const Formula &F, const Loop &L);
124609467b48Spatrick   void DeleteFormula(Formula &F);
124709467b48Spatrick   void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
124809467b48Spatrick 
124909467b48Spatrick   void print(raw_ostream &OS) const;
125009467b48Spatrick   void dump() const;
125109467b48Spatrick };
125209467b48Spatrick 
125309467b48Spatrick } // end anonymous namespace
125409467b48Spatrick 
125509467b48Spatrick static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
125609467b48Spatrick                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
125709467b48Spatrick                                  GlobalValue *BaseGV, int64_t BaseOffset,
125809467b48Spatrick                                  bool HasBaseReg, int64_t Scale,
125909467b48Spatrick                                  Instruction *Fixup = nullptr);
126009467b48Spatrick 
getSetupCost(const SCEV * Reg,unsigned Depth)126109467b48Spatrick static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
126209467b48Spatrick   if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
126309467b48Spatrick     return 1;
126409467b48Spatrick   if (Depth == 0)
126509467b48Spatrick     return 0;
126609467b48Spatrick   if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
126709467b48Spatrick     return getSetupCost(S->getStart(), Depth - 1);
126873471bf0Spatrick   if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
126909467b48Spatrick     return getSetupCost(S->getOperand(), Depth - 1);
127009467b48Spatrick   if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
1271*d415bd75Srobert     return std::accumulate(S->operands().begin(), S->operands().end(), 0,
127209467b48Spatrick                            [&](unsigned i, const SCEV *Reg) {
127309467b48Spatrick                              return i + getSetupCost(Reg, Depth - 1);
127409467b48Spatrick                            });
127509467b48Spatrick   if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
127609467b48Spatrick     return getSetupCost(S->getLHS(), Depth - 1) +
127709467b48Spatrick            getSetupCost(S->getRHS(), Depth - 1);
127809467b48Spatrick   return 0;
127909467b48Spatrick }
128009467b48Spatrick 
128109467b48Spatrick /// Tally up interesting quantities from the given register.
RateRegister(const Formula & F,const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs)128209467b48Spatrick void Cost::RateRegister(const Formula &F, const SCEV *Reg,
128309467b48Spatrick                         SmallPtrSetImpl<const SCEV *> &Regs) {
128409467b48Spatrick   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
128509467b48Spatrick     // If this is an addrec for another loop, it should be an invariant
128609467b48Spatrick     // with respect to L since L is the innermost loop (at least
128709467b48Spatrick     // for now LSR only handles innermost loops).
128809467b48Spatrick     if (AR->getLoop() != L) {
128909467b48Spatrick       // If the AddRec exists, consider it's register free and leave it alone.
129073471bf0Spatrick       if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed)
129109467b48Spatrick         return;
129209467b48Spatrick 
129309467b48Spatrick       // It is bad to allow LSR for current loop to add induction variables
129409467b48Spatrick       // for its sibling loops.
129509467b48Spatrick       if (!AR->getLoop()->contains(L)) {
129609467b48Spatrick         Lose();
129709467b48Spatrick         return;
129809467b48Spatrick       }
129909467b48Spatrick 
130009467b48Spatrick       // Otherwise, it will be an invariant with respect to Loop L.
130109467b48Spatrick       ++C.NumRegs;
130209467b48Spatrick       return;
130309467b48Spatrick     }
130409467b48Spatrick 
130509467b48Spatrick     unsigned LoopCost = 1;
130609467b48Spatrick     if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
130709467b48Spatrick         TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
130809467b48Spatrick 
130909467b48Spatrick       // If the step size matches the base offset, we could use pre-indexed
131009467b48Spatrick       // addressing.
131173471bf0Spatrick       if (AMK == TTI::AMK_PreIndexed) {
131209467b48Spatrick         if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
131309467b48Spatrick           if (Step->getAPInt() == F.BaseOffset)
131409467b48Spatrick             LoopCost = 0;
131573471bf0Spatrick       } else if (AMK == TTI::AMK_PostIndexed) {
131609467b48Spatrick         const SCEV *LoopStep = AR->getStepRecurrence(*SE);
131709467b48Spatrick         if (isa<SCEVConstant>(LoopStep)) {
131809467b48Spatrick           const SCEV *LoopStart = AR->getStart();
131909467b48Spatrick           if (!isa<SCEVConstant>(LoopStart) &&
132009467b48Spatrick               SE->isLoopInvariant(LoopStart, L))
132109467b48Spatrick             LoopCost = 0;
132209467b48Spatrick         }
132309467b48Spatrick       }
132409467b48Spatrick     }
132509467b48Spatrick     C.AddRecCost += LoopCost;
132609467b48Spatrick 
132709467b48Spatrick     // Add the step value register, if it needs one.
132809467b48Spatrick     // TODO: The non-affine case isn't precisely modeled here.
132909467b48Spatrick     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
133009467b48Spatrick       if (!Regs.count(AR->getOperand(1))) {
133109467b48Spatrick         RateRegister(F, AR->getOperand(1), Regs);
133209467b48Spatrick         if (isLoser())
133309467b48Spatrick           return;
133409467b48Spatrick       }
133509467b48Spatrick     }
133609467b48Spatrick   }
133709467b48Spatrick   ++C.NumRegs;
133809467b48Spatrick 
133909467b48Spatrick   // Rough heuristic; favor registers which don't require extra setup
134009467b48Spatrick   // instructions in the preheader.
134109467b48Spatrick   C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
134209467b48Spatrick   // Ensure we don't, even with the recusion limit, produce invalid costs.
134309467b48Spatrick   C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
134409467b48Spatrick 
134509467b48Spatrick   C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
134609467b48Spatrick                SE->hasComputableLoopEvolution(Reg, L);
134709467b48Spatrick }
134809467b48Spatrick 
134909467b48Spatrick /// Record this register in the set. If we haven't seen it before, rate
135009467b48Spatrick /// it. Optional LoserRegs provides a way to declare any formula that refers to
135109467b48Spatrick /// one of those regs an instant loser.
RatePrimaryRegister(const Formula & F,const SCEV * Reg,SmallPtrSetImpl<const SCEV * > & Regs,SmallPtrSetImpl<const SCEV * > * LoserRegs)135209467b48Spatrick void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
135309467b48Spatrick                                SmallPtrSetImpl<const SCEV *> &Regs,
135409467b48Spatrick                                SmallPtrSetImpl<const SCEV *> *LoserRegs) {
135509467b48Spatrick   if (LoserRegs && LoserRegs->count(Reg)) {
135609467b48Spatrick     Lose();
135709467b48Spatrick     return;
135809467b48Spatrick   }
135909467b48Spatrick   if (Regs.insert(Reg).second) {
136009467b48Spatrick     RateRegister(F, Reg, Regs);
136109467b48Spatrick     if (LoserRegs && isLoser())
136209467b48Spatrick       LoserRegs->insert(Reg);
136309467b48Spatrick   }
136409467b48Spatrick }
136509467b48Spatrick 
RateFormula(const Formula & F,SmallPtrSetImpl<const SCEV * > & Regs,const DenseSet<const SCEV * > & VisitedRegs,const LSRUse & LU,SmallPtrSetImpl<const SCEV * > * LoserRegs)136609467b48Spatrick void Cost::RateFormula(const Formula &F,
136709467b48Spatrick                        SmallPtrSetImpl<const SCEV *> &Regs,
136809467b48Spatrick                        const DenseSet<const SCEV *> &VisitedRegs,
136909467b48Spatrick                        const LSRUse &LU,
137009467b48Spatrick                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1371*d415bd75Srobert   if (isLoser())
1372*d415bd75Srobert     return;
137309467b48Spatrick   assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
137409467b48Spatrick   // Tally up the registers.
137509467b48Spatrick   unsigned PrevAddRecCost = C.AddRecCost;
137609467b48Spatrick   unsigned PrevNumRegs = C.NumRegs;
137709467b48Spatrick   unsigned PrevNumBaseAdds = C.NumBaseAdds;
137809467b48Spatrick   if (const SCEV *ScaledReg = F.ScaledReg) {
137909467b48Spatrick     if (VisitedRegs.count(ScaledReg)) {
138009467b48Spatrick       Lose();
138109467b48Spatrick       return;
138209467b48Spatrick     }
138309467b48Spatrick     RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
138409467b48Spatrick     if (isLoser())
138509467b48Spatrick       return;
138609467b48Spatrick   }
138709467b48Spatrick   for (const SCEV *BaseReg : F.BaseRegs) {
138809467b48Spatrick     if (VisitedRegs.count(BaseReg)) {
138909467b48Spatrick       Lose();
139009467b48Spatrick       return;
139109467b48Spatrick     }
139209467b48Spatrick     RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
139309467b48Spatrick     if (isLoser())
139409467b48Spatrick       return;
139509467b48Spatrick   }
139609467b48Spatrick 
139709467b48Spatrick   // Determine how many (unfolded) adds we'll need inside the loop.
139809467b48Spatrick   size_t NumBaseParts = F.getNumRegs();
139909467b48Spatrick   if (NumBaseParts > 1)
140009467b48Spatrick     // Do not count the base and a possible second register if the target
140109467b48Spatrick     // allows to fold 2 registers.
140209467b48Spatrick     C.NumBaseAdds +=
140309467b48Spatrick         NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
140409467b48Spatrick   C.NumBaseAdds += (F.UnfoldedOffset != 0);
140509467b48Spatrick 
140609467b48Spatrick   // Accumulate non-free scaling amounts.
140773471bf0Spatrick   C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue();
140809467b48Spatrick 
140909467b48Spatrick   // Tally up the non-zero immediates.
141009467b48Spatrick   for (const LSRFixup &Fixup : LU.Fixups) {
141109467b48Spatrick     int64_t O = Fixup.Offset;
141209467b48Spatrick     int64_t Offset = (uint64_t)O + F.BaseOffset;
141309467b48Spatrick     if (F.BaseGV)
141409467b48Spatrick       C.ImmCost += 64; // Handle symbolic values conservatively.
141509467b48Spatrick                      // TODO: This should probably be the pointer size.
141609467b48Spatrick     else if (Offset != 0)
141709467b48Spatrick       C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
141809467b48Spatrick 
141909467b48Spatrick     // Check with target if this offset with this instruction is
142009467b48Spatrick     // specifically not supported.
142109467b48Spatrick     if (LU.Kind == LSRUse::Address && Offset != 0 &&
142209467b48Spatrick         !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
142309467b48Spatrick                               Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
142409467b48Spatrick       C.NumBaseAdds++;
142509467b48Spatrick   }
142609467b48Spatrick 
142709467b48Spatrick   // If we don't count instruction cost exit here.
142809467b48Spatrick   if (!InsnsCost) {
142909467b48Spatrick     assert(isValid() && "invalid cost");
143009467b48Spatrick     return;
143109467b48Spatrick   }
143209467b48Spatrick 
143309467b48Spatrick   // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
143409467b48Spatrick   // additional instruction (at least fill).
143509467b48Spatrick   // TODO: Need distinguish register class?
143609467b48Spatrick   unsigned TTIRegNum = TTI->getNumberOfRegisters(
143709467b48Spatrick                        TTI->getRegisterClassForType(false, F.getType())) - 1;
143809467b48Spatrick   if (C.NumRegs > TTIRegNum) {
143909467b48Spatrick     // Cost already exceeded TTIRegNum, then only newly added register can add
144009467b48Spatrick     // new instructions.
144109467b48Spatrick     if (PrevNumRegs > TTIRegNum)
144209467b48Spatrick       C.Insns += (C.NumRegs - PrevNumRegs);
144309467b48Spatrick     else
144409467b48Spatrick       C.Insns += (C.NumRegs - TTIRegNum);
144509467b48Spatrick   }
144609467b48Spatrick 
144709467b48Spatrick   // If ICmpZero formula ends with not 0, it could not be replaced by
144809467b48Spatrick   // just add or sub. We'll need to compare final result of AddRec.
144909467b48Spatrick   // That means we'll need an additional instruction. But if the target can
145009467b48Spatrick   // macro-fuse a compare with a branch, don't count this extra instruction.
145109467b48Spatrick   // For -10 + {0, +, 1}:
145209467b48Spatrick   // i = i + 1;
145309467b48Spatrick   // cmp i, 10
145409467b48Spatrick   //
145509467b48Spatrick   // For {-10, +, 1}:
145609467b48Spatrick   // i = i + 1;
145709467b48Spatrick   if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
145809467b48Spatrick       !TTI->canMacroFuseCmp())
145909467b48Spatrick     C.Insns++;
146009467b48Spatrick   // Each new AddRec adds 1 instruction to calculation.
146109467b48Spatrick   C.Insns += (C.AddRecCost - PrevAddRecCost);
146209467b48Spatrick 
146309467b48Spatrick   // BaseAdds adds instructions for unfolded registers.
146409467b48Spatrick   if (LU.Kind != LSRUse::ICmpZero)
146509467b48Spatrick     C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
146609467b48Spatrick   assert(isValid() && "invalid cost");
146709467b48Spatrick }
146809467b48Spatrick 
146909467b48Spatrick /// Set this cost to a losing value.
Lose()147009467b48Spatrick void Cost::Lose() {
147109467b48Spatrick   C.Insns = std::numeric_limits<unsigned>::max();
147209467b48Spatrick   C.NumRegs = std::numeric_limits<unsigned>::max();
147309467b48Spatrick   C.AddRecCost = std::numeric_limits<unsigned>::max();
147409467b48Spatrick   C.NumIVMuls = std::numeric_limits<unsigned>::max();
147509467b48Spatrick   C.NumBaseAdds = std::numeric_limits<unsigned>::max();
147609467b48Spatrick   C.ImmCost = std::numeric_limits<unsigned>::max();
147709467b48Spatrick   C.SetupCost = std::numeric_limits<unsigned>::max();
147809467b48Spatrick   C.ScaleCost = std::numeric_limits<unsigned>::max();
147909467b48Spatrick }
148009467b48Spatrick 
148109467b48Spatrick /// Choose the lower cost.
isLess(const Cost & Other) const1482*d415bd75Srobert bool Cost::isLess(const Cost &Other) const {
148309467b48Spatrick   if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
148409467b48Spatrick       C.Insns != Other.C.Insns)
148509467b48Spatrick     return C.Insns < Other.C.Insns;
148609467b48Spatrick   return TTI->isLSRCostLess(C, Other.C);
148709467b48Spatrick }
148809467b48Spatrick 
148909467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const149009467b48Spatrick void Cost::print(raw_ostream &OS) const {
149109467b48Spatrick   if (InsnsCost)
149209467b48Spatrick     OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
149309467b48Spatrick   OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
149409467b48Spatrick   if (C.AddRecCost != 0)
149509467b48Spatrick     OS << ", with addrec cost " << C.AddRecCost;
149609467b48Spatrick   if (C.NumIVMuls != 0)
149709467b48Spatrick     OS << ", plus " << C.NumIVMuls << " IV mul"
149809467b48Spatrick        << (C.NumIVMuls == 1 ? "" : "s");
149909467b48Spatrick   if (C.NumBaseAdds != 0)
150009467b48Spatrick     OS << ", plus " << C.NumBaseAdds << " base add"
150109467b48Spatrick        << (C.NumBaseAdds == 1 ? "" : "s");
150209467b48Spatrick   if (C.ScaleCost != 0)
150309467b48Spatrick     OS << ", plus " << C.ScaleCost << " scale cost";
150409467b48Spatrick   if (C.ImmCost != 0)
150509467b48Spatrick     OS << ", plus " << C.ImmCost << " imm cost";
150609467b48Spatrick   if (C.SetupCost != 0)
150709467b48Spatrick     OS << ", plus " << C.SetupCost << " setup cost";
150809467b48Spatrick }
150909467b48Spatrick 
dump() const151009467b48Spatrick LLVM_DUMP_METHOD void Cost::dump() const {
151109467b48Spatrick   print(errs()); errs() << '\n';
151209467b48Spatrick }
151309467b48Spatrick #endif
151409467b48Spatrick 
151509467b48Spatrick /// Test whether this fixup always uses its value outside of the given loop.
isUseFullyOutsideLoop(const Loop * L) const151609467b48Spatrick bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
151709467b48Spatrick   // PHI nodes use their value in their incoming blocks.
151809467b48Spatrick   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
151909467b48Spatrick     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
152009467b48Spatrick       if (PN->getIncomingValue(i) == OperandValToReplace &&
152109467b48Spatrick           L->contains(PN->getIncomingBlock(i)))
152209467b48Spatrick         return false;
152309467b48Spatrick     return true;
152409467b48Spatrick   }
152509467b48Spatrick 
152609467b48Spatrick   return !L->contains(UserInst);
152709467b48Spatrick }
152809467b48Spatrick 
152909467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const153009467b48Spatrick void LSRFixup::print(raw_ostream &OS) const {
153109467b48Spatrick   OS << "UserInst=";
153209467b48Spatrick   // Store is common and interesting enough to be worth special-casing.
153309467b48Spatrick   if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
153409467b48Spatrick     OS << "store ";
153509467b48Spatrick     Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
153609467b48Spatrick   } else if (UserInst->getType()->isVoidTy())
153709467b48Spatrick     OS << UserInst->getOpcodeName();
153809467b48Spatrick   else
153909467b48Spatrick     UserInst->printAsOperand(OS, /*PrintType=*/false);
154009467b48Spatrick 
154109467b48Spatrick   OS << ", OperandValToReplace=";
154209467b48Spatrick   OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
154309467b48Spatrick 
154409467b48Spatrick   for (const Loop *PIL : PostIncLoops) {
154509467b48Spatrick     OS << ", PostIncLoop=";
154609467b48Spatrick     PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
154709467b48Spatrick   }
154809467b48Spatrick 
154909467b48Spatrick   if (Offset != 0)
155009467b48Spatrick     OS << ", Offset=" << Offset;
155109467b48Spatrick }
155209467b48Spatrick 
dump() const155309467b48Spatrick LLVM_DUMP_METHOD void LSRFixup::dump() const {
155409467b48Spatrick   print(errs()); errs() << '\n';
155509467b48Spatrick }
155609467b48Spatrick #endif
155709467b48Spatrick 
155809467b48Spatrick /// Test whether this use as a formula which has the same registers as the given
155909467b48Spatrick /// formula.
HasFormulaWithSameRegs(const Formula & F) const156009467b48Spatrick bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
156109467b48Spatrick   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
156209467b48Spatrick   if (F.ScaledReg) Key.push_back(F.ScaledReg);
156309467b48Spatrick   // Unstable sort by host order ok, because this is only used for uniquifying.
156409467b48Spatrick   llvm::sort(Key);
156509467b48Spatrick   return Uniquifier.count(Key);
156609467b48Spatrick }
156709467b48Spatrick 
156809467b48Spatrick /// The function returns a probability of selecting formula without Reg.
getNotSelectedProbability(const SCEV * Reg) const156909467b48Spatrick float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
157009467b48Spatrick   unsigned FNum = 0;
157109467b48Spatrick   for (const Formula &F : Formulae)
157209467b48Spatrick     if (F.referencesReg(Reg))
157309467b48Spatrick       FNum++;
157409467b48Spatrick   return ((float)(Formulae.size() - FNum)) / Formulae.size();
157509467b48Spatrick }
157609467b48Spatrick 
157709467b48Spatrick /// If the given formula has not yet been inserted, add it to the list, and
157809467b48Spatrick /// return true. Return false otherwise.  The formula must be in canonical form.
InsertFormula(const Formula & F,const Loop & L)157909467b48Spatrick bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
158009467b48Spatrick   assert(F.isCanonical(L) && "Invalid canonical representation");
158109467b48Spatrick 
158209467b48Spatrick   if (!Formulae.empty() && RigidFormula)
158309467b48Spatrick     return false;
158409467b48Spatrick 
158509467b48Spatrick   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
158609467b48Spatrick   if (F.ScaledReg) Key.push_back(F.ScaledReg);
158709467b48Spatrick   // Unstable sort by host order ok, because this is only used for uniquifying.
158809467b48Spatrick   llvm::sort(Key);
158909467b48Spatrick 
159009467b48Spatrick   if (!Uniquifier.insert(Key).second)
159109467b48Spatrick     return false;
159209467b48Spatrick 
159309467b48Spatrick   // Using a register to hold the value of 0 is not profitable.
159409467b48Spatrick   assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
159509467b48Spatrick          "Zero allocated in a scaled register!");
159609467b48Spatrick #ifndef NDEBUG
159709467b48Spatrick   for (const SCEV *BaseReg : F.BaseRegs)
159809467b48Spatrick     assert(!BaseReg->isZero() && "Zero allocated in a base register!");
159909467b48Spatrick #endif
160009467b48Spatrick 
160109467b48Spatrick   // Add the formula to the list.
160209467b48Spatrick   Formulae.push_back(F);
160309467b48Spatrick 
160409467b48Spatrick   // Record registers now being used by this use.
160509467b48Spatrick   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
160609467b48Spatrick   if (F.ScaledReg)
160709467b48Spatrick     Regs.insert(F.ScaledReg);
160809467b48Spatrick 
160909467b48Spatrick   return true;
161009467b48Spatrick }
161109467b48Spatrick 
161209467b48Spatrick /// Remove the given formula from this use's list.
DeleteFormula(Formula & F)161309467b48Spatrick void LSRUse::DeleteFormula(Formula &F) {
161409467b48Spatrick   if (&F != &Formulae.back())
161509467b48Spatrick     std::swap(F, Formulae.back());
161609467b48Spatrick   Formulae.pop_back();
161709467b48Spatrick }
161809467b48Spatrick 
161909467b48Spatrick /// Recompute the Regs field, and update RegUses.
RecomputeRegs(size_t LUIdx,RegUseTracker & RegUses)162009467b48Spatrick void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
162109467b48Spatrick   // Now that we've filtered out some formulae, recompute the Regs set.
162209467b48Spatrick   SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
162309467b48Spatrick   Regs.clear();
162409467b48Spatrick   for (const Formula &F : Formulae) {
162509467b48Spatrick     if (F.ScaledReg) Regs.insert(F.ScaledReg);
162609467b48Spatrick     Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
162709467b48Spatrick   }
162809467b48Spatrick 
162909467b48Spatrick   // Update the RegTracker.
163009467b48Spatrick   for (const SCEV *S : OldRegs)
163109467b48Spatrick     if (!Regs.count(S))
163209467b48Spatrick       RegUses.dropRegister(S, LUIdx);
163309467b48Spatrick }
163409467b48Spatrick 
163509467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const163609467b48Spatrick void LSRUse::print(raw_ostream &OS) const {
163709467b48Spatrick   OS << "LSR Use: Kind=";
163809467b48Spatrick   switch (Kind) {
163909467b48Spatrick   case Basic:    OS << "Basic"; break;
164009467b48Spatrick   case Special:  OS << "Special"; break;
164109467b48Spatrick   case ICmpZero: OS << "ICmpZero"; break;
164209467b48Spatrick   case Address:
164309467b48Spatrick     OS << "Address of ";
164409467b48Spatrick     if (AccessTy.MemTy->isPointerTy())
164509467b48Spatrick       OS << "pointer"; // the full pointer type could be really verbose
164609467b48Spatrick     else {
164709467b48Spatrick       OS << *AccessTy.MemTy;
164809467b48Spatrick     }
164909467b48Spatrick 
165009467b48Spatrick     OS << " in addrspace(" << AccessTy.AddrSpace << ')';
165109467b48Spatrick   }
165209467b48Spatrick 
165309467b48Spatrick   OS << ", Offsets={";
165409467b48Spatrick   bool NeedComma = false;
165509467b48Spatrick   for (const LSRFixup &Fixup : Fixups) {
165609467b48Spatrick     if (NeedComma) OS << ',';
165709467b48Spatrick     OS << Fixup.Offset;
165809467b48Spatrick     NeedComma = true;
165909467b48Spatrick   }
166009467b48Spatrick   OS << '}';
166109467b48Spatrick 
166209467b48Spatrick   if (AllFixupsOutsideLoop)
166309467b48Spatrick     OS << ", all-fixups-outside-loop";
166409467b48Spatrick 
166509467b48Spatrick   if (WidestFixupType)
166609467b48Spatrick     OS << ", widest fixup type: " << *WidestFixupType;
166709467b48Spatrick }
166809467b48Spatrick 
dump() const166909467b48Spatrick LLVM_DUMP_METHOD void LSRUse::dump() const {
167009467b48Spatrick   print(errs()); errs() << '\n';
167109467b48Spatrick }
167209467b48Spatrick #endif
167309467b48Spatrick 
isAMCompletelyFolded(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,Instruction * Fixup)167409467b48Spatrick static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
167509467b48Spatrick                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
167609467b48Spatrick                                  GlobalValue *BaseGV, int64_t BaseOffset,
167709467b48Spatrick                                  bool HasBaseReg, int64_t Scale,
167809467b48Spatrick                                  Instruction *Fixup/*= nullptr*/) {
167909467b48Spatrick   switch (Kind) {
168009467b48Spatrick   case LSRUse::Address:
168109467b48Spatrick     return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
168209467b48Spatrick                                      HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
168309467b48Spatrick 
168409467b48Spatrick   case LSRUse::ICmpZero:
168509467b48Spatrick     // There's not even a target hook for querying whether it would be legal to
168609467b48Spatrick     // fold a GV into an ICmp.
168709467b48Spatrick     if (BaseGV)
168809467b48Spatrick       return false;
168909467b48Spatrick 
169009467b48Spatrick     // ICmp only has two operands; don't allow more than two non-trivial parts.
169109467b48Spatrick     if (Scale != 0 && HasBaseReg && BaseOffset != 0)
169209467b48Spatrick       return false;
169309467b48Spatrick 
169409467b48Spatrick     // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
169509467b48Spatrick     // putting the scaled register in the other operand of the icmp.
169609467b48Spatrick     if (Scale != 0 && Scale != -1)
169709467b48Spatrick       return false;
169809467b48Spatrick 
169909467b48Spatrick     // If we have low-level target information, ask the target if it can fold an
170009467b48Spatrick     // integer immediate on an icmp.
170109467b48Spatrick     if (BaseOffset != 0) {
170209467b48Spatrick       // We have one of:
170309467b48Spatrick       // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
170409467b48Spatrick       // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
170509467b48Spatrick       // Offs is the ICmp immediate.
170609467b48Spatrick       if (Scale == 0)
170709467b48Spatrick         // The cast does the right thing with
170809467b48Spatrick         // std::numeric_limits<int64_t>::min().
170909467b48Spatrick         BaseOffset = -(uint64_t)BaseOffset;
171009467b48Spatrick       return TTI.isLegalICmpImmediate(BaseOffset);
171109467b48Spatrick     }
171209467b48Spatrick 
171309467b48Spatrick     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
171409467b48Spatrick     return true;
171509467b48Spatrick 
171609467b48Spatrick   case LSRUse::Basic:
171709467b48Spatrick     // Only handle single-register values.
171809467b48Spatrick     return !BaseGV && Scale == 0 && BaseOffset == 0;
171909467b48Spatrick 
172009467b48Spatrick   case LSRUse::Special:
172109467b48Spatrick     // Special case Basic to handle -1 scales.
172209467b48Spatrick     return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
172309467b48Spatrick   }
172409467b48Spatrick 
172509467b48Spatrick   llvm_unreachable("Invalid LSRUse Kind!");
172609467b48Spatrick }
172709467b48Spatrick 
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)172809467b48Spatrick static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
172909467b48Spatrick                                  int64_t MinOffset, int64_t MaxOffset,
173009467b48Spatrick                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
173109467b48Spatrick                                  GlobalValue *BaseGV, int64_t BaseOffset,
173209467b48Spatrick                                  bool HasBaseReg, int64_t Scale) {
173309467b48Spatrick   // Check for overflow.
173409467b48Spatrick   if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
173509467b48Spatrick       (MinOffset > 0))
173609467b48Spatrick     return false;
173709467b48Spatrick   MinOffset = (uint64_t)BaseOffset + MinOffset;
173809467b48Spatrick   if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
173909467b48Spatrick       (MaxOffset > 0))
174009467b48Spatrick     return false;
174109467b48Spatrick   MaxOffset = (uint64_t)BaseOffset + MaxOffset;
174209467b48Spatrick 
174309467b48Spatrick   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
174409467b48Spatrick                               HasBaseReg, Scale) &&
174509467b48Spatrick          isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
174609467b48Spatrick                               HasBaseReg, Scale);
174709467b48Spatrick }
174809467b48Spatrick 
isAMCompletelyFolded(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F,const Loop & L)174909467b48Spatrick static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
175009467b48Spatrick                                  int64_t MinOffset, int64_t MaxOffset,
175109467b48Spatrick                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
175209467b48Spatrick                                  const Formula &F, const Loop &L) {
175309467b48Spatrick   // For the purpose of isAMCompletelyFolded either having a canonical formula
175409467b48Spatrick   // or a scale not equal to zero is correct.
175509467b48Spatrick   // Problems may arise from non canonical formulae having a scale == 0.
175609467b48Spatrick   // Strictly speaking it would best to just rely on canonical formulae.
175709467b48Spatrick   // However, when we generate the scaled formulae, we first check that the
175809467b48Spatrick   // scaling factor is profitable before computing the actual ScaledReg for
175909467b48Spatrick   // compile time sake.
176009467b48Spatrick   assert((F.isCanonical(L) || F.Scale != 0));
176109467b48Spatrick   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
176209467b48Spatrick                               F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
176309467b48Spatrick }
176409467b48Spatrick 
176509467b48Spatrick /// Test whether we know how to expand the current formula.
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale)176609467b48Spatrick static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
176709467b48Spatrick                        int64_t MaxOffset, LSRUse::KindType Kind,
176809467b48Spatrick                        MemAccessTy AccessTy, GlobalValue *BaseGV,
176909467b48Spatrick                        int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
177009467b48Spatrick   // We know how to expand completely foldable formulae.
177109467b48Spatrick   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
177209467b48Spatrick                               BaseOffset, HasBaseReg, Scale) ||
177309467b48Spatrick          // Or formulae that use a base register produced by a sum of base
177409467b48Spatrick          // registers.
177509467b48Spatrick          (Scale == 1 &&
177609467b48Spatrick           isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
177709467b48Spatrick                                BaseGV, BaseOffset, true, 0));
177809467b48Spatrick }
177909467b48Spatrick 
isLegalUse(const TargetTransformInfo & TTI,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const Formula & F)178009467b48Spatrick static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
178109467b48Spatrick                        int64_t MaxOffset, LSRUse::KindType Kind,
178209467b48Spatrick                        MemAccessTy AccessTy, const Formula &F) {
178309467b48Spatrick   return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
178409467b48Spatrick                     F.BaseOffset, F.HasBaseReg, F.Scale);
178509467b48Spatrick }
178609467b48Spatrick 
isAMCompletelyFolded(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F)178709467b48Spatrick static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
178809467b48Spatrick                                  const LSRUse &LU, const Formula &F) {
178909467b48Spatrick   // Target may want to look at the user instructions.
179009467b48Spatrick   if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
179109467b48Spatrick     for (const LSRFixup &Fixup : LU.Fixups)
179209467b48Spatrick       if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
179309467b48Spatrick                                 (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
179409467b48Spatrick                                 F.Scale, Fixup.UserInst))
179509467b48Spatrick         return false;
179609467b48Spatrick     return true;
179709467b48Spatrick   }
179809467b48Spatrick 
179909467b48Spatrick   return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
180009467b48Spatrick                               LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
180109467b48Spatrick                               F.Scale);
180209467b48Spatrick }
180309467b48Spatrick 
getScalingFactorCost(const TargetTransformInfo & TTI,const LSRUse & LU,const Formula & F,const Loop & L)180473471bf0Spatrick static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
180509467b48Spatrick                                             const LSRUse &LU, const Formula &F,
180609467b48Spatrick                                             const Loop &L) {
180709467b48Spatrick   if (!F.Scale)
180809467b48Spatrick     return 0;
180909467b48Spatrick 
181009467b48Spatrick   // If the use is not completely folded in that instruction, we will have to
181109467b48Spatrick   // pay an extra cost only for scale != 1.
181209467b48Spatrick   if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
181309467b48Spatrick                             LU.AccessTy, F, L))
181409467b48Spatrick     return F.Scale != 1;
181509467b48Spatrick 
181609467b48Spatrick   switch (LU.Kind) {
181709467b48Spatrick   case LSRUse::Address: {
181809467b48Spatrick     // Check the scaling factor cost with both the min and max offsets.
181973471bf0Spatrick     InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
182009467b48Spatrick         LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
182109467b48Spatrick         F.Scale, LU.AccessTy.AddrSpace);
182273471bf0Spatrick     InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
182309467b48Spatrick         LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
182409467b48Spatrick         F.Scale, LU.AccessTy.AddrSpace);
182509467b48Spatrick 
182673471bf0Spatrick     assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
182709467b48Spatrick            "Legal addressing mode has an illegal cost!");
182809467b48Spatrick     return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
182909467b48Spatrick   }
183009467b48Spatrick   case LSRUse::ICmpZero:
183109467b48Spatrick   case LSRUse::Basic:
183209467b48Spatrick   case LSRUse::Special:
183309467b48Spatrick     // The use is completely folded, i.e., everything is folded into the
183409467b48Spatrick     // instruction.
183509467b48Spatrick     return 0;
183609467b48Spatrick   }
183709467b48Spatrick 
183809467b48Spatrick   llvm_unreachable("Invalid LSRUse Kind!");
183909467b48Spatrick }
184009467b48Spatrick 
isAlwaysFoldable(const TargetTransformInfo & TTI,LSRUse::KindType Kind,MemAccessTy AccessTy,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg)184109467b48Spatrick static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
184209467b48Spatrick                              LSRUse::KindType Kind, MemAccessTy AccessTy,
184309467b48Spatrick                              GlobalValue *BaseGV, int64_t BaseOffset,
184409467b48Spatrick                              bool HasBaseReg) {
184509467b48Spatrick   // Fast-path: zero is always foldable.
184609467b48Spatrick   if (BaseOffset == 0 && !BaseGV) return true;
184709467b48Spatrick 
184809467b48Spatrick   // Conservatively, create an address with an immediate and a
184909467b48Spatrick   // base and a scale.
185009467b48Spatrick   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
185109467b48Spatrick 
185209467b48Spatrick   // Canonicalize a scale of 1 to a base register if the formula doesn't
185309467b48Spatrick   // already have a base register.
185409467b48Spatrick   if (!HasBaseReg && Scale == 1) {
185509467b48Spatrick     Scale = 0;
185609467b48Spatrick     HasBaseReg = true;
185709467b48Spatrick   }
185809467b48Spatrick 
185909467b48Spatrick   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
186009467b48Spatrick                               HasBaseReg, Scale);
186109467b48Spatrick }
186209467b48Spatrick 
isAlwaysFoldable(const TargetTransformInfo & TTI,ScalarEvolution & SE,int64_t MinOffset,int64_t MaxOffset,LSRUse::KindType Kind,MemAccessTy AccessTy,const SCEV * S,bool HasBaseReg)186309467b48Spatrick static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
186409467b48Spatrick                              ScalarEvolution &SE, int64_t MinOffset,
186509467b48Spatrick                              int64_t MaxOffset, LSRUse::KindType Kind,
186609467b48Spatrick                              MemAccessTy AccessTy, const SCEV *S,
186709467b48Spatrick                              bool HasBaseReg) {
186809467b48Spatrick   // Fast-path: zero is always foldable.
186909467b48Spatrick   if (S->isZero()) return true;
187009467b48Spatrick 
187109467b48Spatrick   // Conservatively, create an address with an immediate and a
187209467b48Spatrick   // base and a scale.
187309467b48Spatrick   int64_t BaseOffset = ExtractImmediate(S, SE);
187409467b48Spatrick   GlobalValue *BaseGV = ExtractSymbol(S, SE);
187509467b48Spatrick 
187609467b48Spatrick   // If there's anything else involved, it's not foldable.
187709467b48Spatrick   if (!S->isZero()) return false;
187809467b48Spatrick 
187909467b48Spatrick   // Fast-path: zero is always foldable.
188009467b48Spatrick   if (BaseOffset == 0 && !BaseGV) return true;
188109467b48Spatrick 
188209467b48Spatrick   // Conservatively, create an address with an immediate and a
188309467b48Spatrick   // base and a scale.
188409467b48Spatrick   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
188509467b48Spatrick 
188609467b48Spatrick   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
188709467b48Spatrick                               BaseOffset, HasBaseReg, Scale);
188809467b48Spatrick }
188909467b48Spatrick 
189009467b48Spatrick namespace {
189109467b48Spatrick 
189209467b48Spatrick /// An individual increment in a Chain of IV increments.  Relate an IV user to
189309467b48Spatrick /// an expression that computes the IV it uses from the IV used by the previous
189409467b48Spatrick /// link in the Chain.
189509467b48Spatrick ///
189609467b48Spatrick /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
189709467b48Spatrick /// original IVOperand. The head of the chain's IVOperand is only valid during
189809467b48Spatrick /// chain collection, before LSR replaces IV users. During chain generation,
189909467b48Spatrick /// IncExpr can be used to find the new IVOperand that computes the same
190009467b48Spatrick /// expression.
190109467b48Spatrick struct IVInc {
190209467b48Spatrick   Instruction *UserInst;
190309467b48Spatrick   Value* IVOperand;
190409467b48Spatrick   const SCEV *IncExpr;
190509467b48Spatrick 
IVInc__anon3e7394c10a11::IVInc190609467b48Spatrick   IVInc(Instruction *U, Value *O, const SCEV *E)
190709467b48Spatrick       : UserInst(U), IVOperand(O), IncExpr(E) {}
190809467b48Spatrick };
190909467b48Spatrick 
191009467b48Spatrick // The list of IV increments in program order.  We typically add the head of a
191109467b48Spatrick // chain without finding subsequent links.
191209467b48Spatrick struct IVChain {
191309467b48Spatrick   SmallVector<IVInc, 1> Incs;
191409467b48Spatrick   const SCEV *ExprBase = nullptr;
191509467b48Spatrick 
191609467b48Spatrick   IVChain() = default;
IVChain__anon3e7394c10a11::IVChain191709467b48Spatrick   IVChain(const IVInc &Head, const SCEV *Base)
191809467b48Spatrick       : Incs(1, Head), ExprBase(Base) {}
191909467b48Spatrick 
192009467b48Spatrick   using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
192109467b48Spatrick 
192209467b48Spatrick   // Return the first increment in the chain.
begin__anon3e7394c10a11::IVChain192309467b48Spatrick   const_iterator begin() const {
192409467b48Spatrick     assert(!Incs.empty());
192509467b48Spatrick     return std::next(Incs.begin());
192609467b48Spatrick   }
end__anon3e7394c10a11::IVChain192709467b48Spatrick   const_iterator end() const {
192809467b48Spatrick     return Incs.end();
192909467b48Spatrick   }
193009467b48Spatrick 
193109467b48Spatrick   // Returns true if this chain contains any increments.
hasIncs__anon3e7394c10a11::IVChain193209467b48Spatrick   bool hasIncs() const { return Incs.size() >= 2; }
193309467b48Spatrick 
193409467b48Spatrick   // Add an IVInc to the end of this chain.
add__anon3e7394c10a11::IVChain193509467b48Spatrick   void add(const IVInc &X) { Incs.push_back(X); }
193609467b48Spatrick 
193709467b48Spatrick   // Returns the last UserInst in the chain.
tailUserInst__anon3e7394c10a11::IVChain193809467b48Spatrick   Instruction *tailUserInst() const { return Incs.back().UserInst; }
193909467b48Spatrick 
194009467b48Spatrick   // Returns true if IncExpr can be profitably added to this chain.
194109467b48Spatrick   bool isProfitableIncrement(const SCEV *OperExpr,
194209467b48Spatrick                              const SCEV *IncExpr,
194309467b48Spatrick                              ScalarEvolution&);
194409467b48Spatrick };
194509467b48Spatrick 
194609467b48Spatrick /// Helper for CollectChains to track multiple IV increment uses.  Distinguish
194709467b48Spatrick /// between FarUsers that definitely cross IV increments and NearUsers that may
194809467b48Spatrick /// be used between IV increments.
194909467b48Spatrick struct ChainUsers {
195009467b48Spatrick   SmallPtrSet<Instruction*, 4> FarUsers;
195109467b48Spatrick   SmallPtrSet<Instruction*, 4> NearUsers;
195209467b48Spatrick };
195309467b48Spatrick 
195409467b48Spatrick /// This class holds state for the main loop strength reduction logic.
195509467b48Spatrick class LSRInstance {
195609467b48Spatrick   IVUsers &IU;
195709467b48Spatrick   ScalarEvolution &SE;
195809467b48Spatrick   DominatorTree &DT;
195909467b48Spatrick   LoopInfo &LI;
196009467b48Spatrick   AssumptionCache &AC;
1961097a140dSpatrick   TargetLibraryInfo &TLI;
196209467b48Spatrick   const TargetTransformInfo &TTI;
196309467b48Spatrick   Loop *const L;
1964097a140dSpatrick   MemorySSAUpdater *MSSAU;
196573471bf0Spatrick   TTI::AddressingModeKind AMK;
1966*d415bd75Srobert   mutable SCEVExpander Rewriter;
196709467b48Spatrick   bool Changed = false;
196809467b48Spatrick 
196909467b48Spatrick   /// This is the insert position that the current loop's induction variable
197009467b48Spatrick   /// increment should be placed. In simple loops, this is the latch block's
197109467b48Spatrick   /// terminator. But in more complicated cases, this is a position which will
197209467b48Spatrick   /// dominate all the in-loop post-increment users.
197309467b48Spatrick   Instruction *IVIncInsertPos = nullptr;
197409467b48Spatrick 
197509467b48Spatrick   /// Interesting factors between use strides.
197609467b48Spatrick   ///
197709467b48Spatrick   /// We explicitly use a SetVector which contains a SmallSet, instead of the
197809467b48Spatrick   /// default, a SmallDenseSet, because we need to use the full range of
197909467b48Spatrick   /// int64_ts, and there's currently no good way of doing that with
198009467b48Spatrick   /// SmallDenseSet.
198109467b48Spatrick   SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
198209467b48Spatrick 
1983*d415bd75Srobert   /// The cost of the current SCEV, the best solution by LSR will be dropped if
1984*d415bd75Srobert   /// the solution is not profitable.
1985*d415bd75Srobert   Cost BaselineCost;
1986*d415bd75Srobert 
198709467b48Spatrick   /// Interesting use types, to facilitate truncation reuse.
198809467b48Spatrick   SmallSetVector<Type *, 4> Types;
198909467b48Spatrick 
199009467b48Spatrick   /// The list of interesting uses.
199109467b48Spatrick   mutable SmallVector<LSRUse, 16> Uses;
199209467b48Spatrick 
199309467b48Spatrick   /// Track which uses use which register candidates.
199409467b48Spatrick   RegUseTracker RegUses;
199509467b48Spatrick 
199609467b48Spatrick   // Limit the number of chains to avoid quadratic behavior. We don't expect to
199709467b48Spatrick   // have more than a few IV increment chains in a loop. Missing a Chain falls
199809467b48Spatrick   // back to normal LSR behavior for those uses.
199909467b48Spatrick   static const unsigned MaxChains = 8;
200009467b48Spatrick 
200109467b48Spatrick   /// IV users can form a chain of IV increments.
200209467b48Spatrick   SmallVector<IVChain, MaxChains> IVChainVec;
200309467b48Spatrick 
200409467b48Spatrick   /// IV users that belong to profitable IVChains.
200509467b48Spatrick   SmallPtrSet<Use*, MaxChains> IVIncSet;
200609467b48Spatrick 
200773471bf0Spatrick   /// Induction variables that were generated and inserted by the SCEV Expander.
200873471bf0Spatrick   SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
200973471bf0Spatrick 
201009467b48Spatrick   void OptimizeShadowIV();
201109467b48Spatrick   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
201209467b48Spatrick   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
201309467b48Spatrick   void OptimizeLoopTermCond();
201409467b48Spatrick 
201509467b48Spatrick   void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
201609467b48Spatrick                         SmallVectorImpl<ChainUsers> &ChainUsersVec);
201709467b48Spatrick   void FinalizeChain(IVChain &Chain);
201809467b48Spatrick   void CollectChains();
2019*d415bd75Srobert   void GenerateIVChain(const IVChain &Chain,
202009467b48Spatrick                        SmallVectorImpl<WeakTrackingVH> &DeadInsts);
202109467b48Spatrick 
202209467b48Spatrick   void CollectInterestingTypesAndFactors();
202309467b48Spatrick   void CollectFixupsAndInitialFormulae();
202409467b48Spatrick 
202509467b48Spatrick   // Support for sharing of LSRUses between LSRFixups.
202609467b48Spatrick   using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
202709467b48Spatrick   UseMapTy UseMap;
202809467b48Spatrick 
202909467b48Spatrick   bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
203009467b48Spatrick                           LSRUse::KindType Kind, MemAccessTy AccessTy);
203109467b48Spatrick 
203209467b48Spatrick   std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
203309467b48Spatrick                                     MemAccessTy AccessTy);
203409467b48Spatrick 
203509467b48Spatrick   void DeleteUse(LSRUse &LU, size_t LUIdx);
203609467b48Spatrick 
203709467b48Spatrick   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
203809467b48Spatrick 
203909467b48Spatrick   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
204009467b48Spatrick   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
204109467b48Spatrick   void CountRegisters(const Formula &F, size_t LUIdx);
204209467b48Spatrick   bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
204309467b48Spatrick 
204409467b48Spatrick   void CollectLoopInvariantFixupsAndFormulae();
204509467b48Spatrick 
204609467b48Spatrick   void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
204709467b48Spatrick                               unsigned Depth = 0);
204809467b48Spatrick 
204909467b48Spatrick   void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
205009467b48Spatrick                                   const Formula &Base, unsigned Depth,
205109467b48Spatrick                                   size_t Idx, bool IsScaledReg = false);
205209467b48Spatrick   void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
205309467b48Spatrick   void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
205409467b48Spatrick                                    const Formula &Base, size_t Idx,
205509467b48Spatrick                                    bool IsScaledReg = false);
205609467b48Spatrick   void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
205709467b48Spatrick   void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
205809467b48Spatrick                                    const Formula &Base,
205909467b48Spatrick                                    const SmallVectorImpl<int64_t> &Worklist,
206009467b48Spatrick                                    size_t Idx, bool IsScaledReg = false);
206109467b48Spatrick   void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
206209467b48Spatrick   void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
206309467b48Spatrick   void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
206409467b48Spatrick   void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
206509467b48Spatrick   void GenerateCrossUseConstantOffsets();
206609467b48Spatrick   void GenerateAllReuseFormulae();
206709467b48Spatrick 
206809467b48Spatrick   void FilterOutUndesirableDedicatedRegisters();
206909467b48Spatrick 
207009467b48Spatrick   size_t EstimateSearchSpaceComplexity() const;
207109467b48Spatrick   void NarrowSearchSpaceByDetectingSupersets();
207209467b48Spatrick   void NarrowSearchSpaceByCollapsingUnrolledCode();
207309467b48Spatrick   void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
207409467b48Spatrick   void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2075097a140dSpatrick   void NarrowSearchSpaceByFilterPostInc();
207609467b48Spatrick   void NarrowSearchSpaceByDeletingCostlyFormulas();
207709467b48Spatrick   void NarrowSearchSpaceByPickingWinnerRegs();
207809467b48Spatrick   void NarrowSearchSpaceUsingHeuristics();
207909467b48Spatrick 
208009467b48Spatrick   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
208109467b48Spatrick                     Cost &SolutionCost,
208209467b48Spatrick                     SmallVectorImpl<const Formula *> &Workspace,
208309467b48Spatrick                     const Cost &CurCost,
208409467b48Spatrick                     const SmallPtrSet<const SCEV *, 16> &CurRegs,
208509467b48Spatrick                     DenseSet<const SCEV *> &VisitedRegs) const;
208609467b48Spatrick   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
208709467b48Spatrick 
208809467b48Spatrick   BasicBlock::iterator
208909467b48Spatrick   HoistInsertPosition(BasicBlock::iterator IP,
209009467b48Spatrick                       const SmallVectorImpl<Instruction *> &Inputs) const;
2091*d415bd75Srobert   BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
209209467b48Spatrick                                                      const LSRFixup &LF,
2093*d415bd75Srobert                                                      const LSRUse &LU) const;
209409467b48Spatrick 
209509467b48Spatrick   Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2096*d415bd75Srobert                 BasicBlock::iterator IP,
209709467b48Spatrick                 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
209809467b48Spatrick   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
2099*d415bd75Srobert                      const Formula &F,
210009467b48Spatrick                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
210109467b48Spatrick   void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
210209467b48Spatrick                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
210309467b48Spatrick   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
210409467b48Spatrick 
210509467b48Spatrick public:
210609467b48Spatrick   LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
210709467b48Spatrick               LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
2108097a140dSpatrick               TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
210909467b48Spatrick 
getChanged() const211009467b48Spatrick   bool getChanged() const { return Changed; }
getScalarEvolutionIVs() const211173471bf0Spatrick   const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
211273471bf0Spatrick     return ScalarEvolutionIVs;
211373471bf0Spatrick   }
211409467b48Spatrick 
211509467b48Spatrick   void print_factors_and_types(raw_ostream &OS) const;
211609467b48Spatrick   void print_fixups(raw_ostream &OS) const;
211709467b48Spatrick   void print_uses(raw_ostream &OS) const;
211809467b48Spatrick   void print(raw_ostream &OS) const;
211909467b48Spatrick   void dump() const;
212009467b48Spatrick };
212109467b48Spatrick 
212209467b48Spatrick } // end anonymous namespace
212309467b48Spatrick 
212409467b48Spatrick /// If IV is used in a int-to-float cast inside the loop then try to eliminate
212509467b48Spatrick /// the cast operation.
OptimizeShadowIV()212609467b48Spatrick void LSRInstance::OptimizeShadowIV() {
212709467b48Spatrick   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
212809467b48Spatrick   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
212909467b48Spatrick     return;
213009467b48Spatrick 
213109467b48Spatrick   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
213209467b48Spatrick        UI != E; /* empty */) {
213309467b48Spatrick     IVUsers::const_iterator CandidateUI = UI;
213409467b48Spatrick     ++UI;
213509467b48Spatrick     Instruction *ShadowUse = CandidateUI->getUser();
213609467b48Spatrick     Type *DestTy = nullptr;
213709467b48Spatrick     bool IsSigned = false;
213809467b48Spatrick 
213909467b48Spatrick     /* If shadow use is a int->float cast then insert a second IV
214009467b48Spatrick        to eliminate this cast.
214109467b48Spatrick 
214209467b48Spatrick          for (unsigned i = 0; i < n; ++i)
214309467b48Spatrick            foo((double)i);
214409467b48Spatrick 
214509467b48Spatrick        is transformed into
214609467b48Spatrick 
214709467b48Spatrick          double d = 0.0;
214809467b48Spatrick          for (unsigned i = 0; i < n; ++i, ++d)
214909467b48Spatrick            foo(d);
215009467b48Spatrick     */
215109467b48Spatrick     if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
215209467b48Spatrick       IsSigned = false;
215309467b48Spatrick       DestTy = UCast->getDestTy();
215409467b48Spatrick     }
215509467b48Spatrick     else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
215609467b48Spatrick       IsSigned = true;
215709467b48Spatrick       DestTy = SCast->getDestTy();
215809467b48Spatrick     }
215909467b48Spatrick     if (!DestTy) continue;
216009467b48Spatrick 
216109467b48Spatrick     // If target does not support DestTy natively then do not apply
216209467b48Spatrick     // this transformation.
216309467b48Spatrick     if (!TTI.isTypeLegal(DestTy)) continue;
216409467b48Spatrick 
216509467b48Spatrick     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
216609467b48Spatrick     if (!PH) continue;
216709467b48Spatrick     if (PH->getNumIncomingValues() != 2) continue;
216809467b48Spatrick 
216909467b48Spatrick     // If the calculation in integers overflows, the result in FP type will
217009467b48Spatrick     // differ. So we only can do this transformation if we are guaranteed to not
217109467b48Spatrick     // deal with overflowing values
217209467b48Spatrick     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
217309467b48Spatrick     if (!AR) continue;
217409467b48Spatrick     if (IsSigned && !AR->hasNoSignedWrap()) continue;
217509467b48Spatrick     if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
217609467b48Spatrick 
217709467b48Spatrick     Type *SrcTy = PH->getType();
217809467b48Spatrick     int Mantissa = DestTy->getFPMantissaWidth();
217909467b48Spatrick     if (Mantissa == -1) continue;
218009467b48Spatrick     if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
218109467b48Spatrick       continue;
218209467b48Spatrick 
218309467b48Spatrick     unsigned Entry, Latch;
218409467b48Spatrick     if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
218509467b48Spatrick       Entry = 0;
218609467b48Spatrick       Latch = 1;
218709467b48Spatrick     } else {
218809467b48Spatrick       Entry = 1;
218909467b48Spatrick       Latch = 0;
219009467b48Spatrick     }
219109467b48Spatrick 
219209467b48Spatrick     ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
219309467b48Spatrick     if (!Init) continue;
219409467b48Spatrick     Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
219509467b48Spatrick                                         (double)Init->getSExtValue() :
219609467b48Spatrick                                         (double)Init->getZExtValue());
219709467b48Spatrick 
219809467b48Spatrick     BinaryOperator *Incr =
219909467b48Spatrick       dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
220009467b48Spatrick     if (!Incr) continue;
220109467b48Spatrick     if (Incr->getOpcode() != Instruction::Add
220209467b48Spatrick         && Incr->getOpcode() != Instruction::Sub)
220309467b48Spatrick       continue;
220409467b48Spatrick 
220509467b48Spatrick     /* Initialize new IV, double d = 0.0 in above example. */
220609467b48Spatrick     ConstantInt *C = nullptr;
220709467b48Spatrick     if (Incr->getOperand(0) == PH)
220809467b48Spatrick       C = dyn_cast<ConstantInt>(Incr->getOperand(1));
220909467b48Spatrick     else if (Incr->getOperand(1) == PH)
221009467b48Spatrick       C = dyn_cast<ConstantInt>(Incr->getOperand(0));
221109467b48Spatrick     else
221209467b48Spatrick       continue;
221309467b48Spatrick 
221409467b48Spatrick     if (!C) continue;
221509467b48Spatrick 
221609467b48Spatrick     // Ignore negative constants, as the code below doesn't handle them
221709467b48Spatrick     // correctly. TODO: Remove this restriction.
221809467b48Spatrick     if (!C->getValue().isStrictlyPositive()) continue;
221909467b48Spatrick 
222009467b48Spatrick     /* Add new PHINode. */
222109467b48Spatrick     PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
222209467b48Spatrick 
222309467b48Spatrick     /* create new increment. '++d' in above example. */
222409467b48Spatrick     Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
222509467b48Spatrick     BinaryOperator *NewIncr =
222609467b48Spatrick       BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
222709467b48Spatrick                                Instruction::FAdd : Instruction::FSub,
222809467b48Spatrick                              NewPH, CFP, "IV.S.next.", Incr);
222909467b48Spatrick 
223009467b48Spatrick     NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
223109467b48Spatrick     NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
223209467b48Spatrick 
223309467b48Spatrick     /* Remove cast operation */
223409467b48Spatrick     ShadowUse->replaceAllUsesWith(NewPH);
223509467b48Spatrick     ShadowUse->eraseFromParent();
223609467b48Spatrick     Changed = true;
223709467b48Spatrick     break;
223809467b48Spatrick   }
223909467b48Spatrick }
224009467b48Spatrick 
224109467b48Spatrick /// If Cond has an operand that is an expression of an IV, set the IV user and
224209467b48Spatrick /// stride information and return true, otherwise return false.
FindIVUserForCond(ICmpInst * Cond,IVStrideUse * & CondUse)224309467b48Spatrick bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
224409467b48Spatrick   for (IVStrideUse &U : IU)
224509467b48Spatrick     if (U.getUser() == Cond) {
224609467b48Spatrick       // NOTE: we could handle setcc instructions with multiple uses here, but
224709467b48Spatrick       // InstCombine does it as well for simple uses, it's not clear that it
224809467b48Spatrick       // occurs enough in real life to handle.
224909467b48Spatrick       CondUse = &U;
225009467b48Spatrick       return true;
225109467b48Spatrick     }
225209467b48Spatrick   return false;
225309467b48Spatrick }
225409467b48Spatrick 
225509467b48Spatrick /// Rewrite the loop's terminating condition if it uses a max computation.
225609467b48Spatrick ///
225709467b48Spatrick /// This is a narrow solution to a specific, but acute, problem. For loops
225809467b48Spatrick /// like this:
225909467b48Spatrick ///
226009467b48Spatrick ///   i = 0;
226109467b48Spatrick ///   do {
226209467b48Spatrick ///     p[i] = 0.0;
226309467b48Spatrick ///   } while (++i < n);
226409467b48Spatrick ///
226509467b48Spatrick /// the trip count isn't just 'n', because 'n' might not be positive. And
226609467b48Spatrick /// unfortunately this can come up even for loops where the user didn't use
226709467b48Spatrick /// a C do-while loop. For example, seemingly well-behaved top-test loops
226809467b48Spatrick /// will commonly be lowered like this:
226909467b48Spatrick ///
227009467b48Spatrick ///   if (n > 0) {
227109467b48Spatrick ///     i = 0;
227209467b48Spatrick ///     do {
227309467b48Spatrick ///       p[i] = 0.0;
227409467b48Spatrick ///     } while (++i < n);
227509467b48Spatrick ///   }
227609467b48Spatrick ///
227709467b48Spatrick /// and then it's possible for subsequent optimization to obscure the if
227809467b48Spatrick /// test in such a way that indvars can't find it.
227909467b48Spatrick ///
228009467b48Spatrick /// When indvars can't find the if test in loops like this, it creates a
228109467b48Spatrick /// max expression, which allows it to give the loop a canonical
228209467b48Spatrick /// induction variable:
228309467b48Spatrick ///
228409467b48Spatrick ///   i = 0;
228509467b48Spatrick ///   max = n < 1 ? 1 : n;
228609467b48Spatrick ///   do {
228709467b48Spatrick ///     p[i] = 0.0;
228809467b48Spatrick ///   } while (++i != max);
228909467b48Spatrick ///
229009467b48Spatrick /// Canonical induction variables are necessary because the loop passes
229109467b48Spatrick /// are designed around them. The most obvious example of this is the
229209467b48Spatrick /// LoopInfo analysis, which doesn't remember trip count values. It
229309467b48Spatrick /// expects to be able to rediscover the trip count each time it is
229409467b48Spatrick /// needed, and it does this using a simple analysis that only succeeds if
229509467b48Spatrick /// the loop has a canonical induction variable.
229609467b48Spatrick ///
229709467b48Spatrick /// However, when it comes time to generate code, the maximum operation
229809467b48Spatrick /// can be quite costly, especially if it's inside of an outer loop.
229909467b48Spatrick ///
230009467b48Spatrick /// This function solves this problem by detecting this type of loop and
230109467b48Spatrick /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
230209467b48Spatrick /// the instructions for the maximum computation.
OptimizeMax(ICmpInst * Cond,IVStrideUse * & CondUse)230309467b48Spatrick ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
230409467b48Spatrick   // Check that the loop matches the pattern we're looking for.
230509467b48Spatrick   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
230609467b48Spatrick       Cond->getPredicate() != CmpInst::ICMP_NE)
230709467b48Spatrick     return Cond;
230809467b48Spatrick 
230909467b48Spatrick   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
231009467b48Spatrick   if (!Sel || !Sel->hasOneUse()) return Cond;
231109467b48Spatrick 
231209467b48Spatrick   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
231309467b48Spatrick   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
231409467b48Spatrick     return Cond;
231509467b48Spatrick   const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
231609467b48Spatrick 
231709467b48Spatrick   // Add one to the backedge-taken count to get the trip count.
231809467b48Spatrick   const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
231909467b48Spatrick   if (IterationCount != SE.getSCEV(Sel)) return Cond;
232009467b48Spatrick 
232109467b48Spatrick   // Check for a max calculation that matches the pattern. There's no check
232209467b48Spatrick   // for ICMP_ULE here because the comparison would be with zero, which
232309467b48Spatrick   // isn't interesting.
232409467b48Spatrick   CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
232509467b48Spatrick   const SCEVNAryExpr *Max = nullptr;
232609467b48Spatrick   if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
232709467b48Spatrick     Pred = ICmpInst::ICMP_SLE;
232809467b48Spatrick     Max = S;
232909467b48Spatrick   } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
233009467b48Spatrick     Pred = ICmpInst::ICMP_SLT;
233109467b48Spatrick     Max = S;
233209467b48Spatrick   } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
233309467b48Spatrick     Pred = ICmpInst::ICMP_ULT;
233409467b48Spatrick     Max = U;
233509467b48Spatrick   } else {
233609467b48Spatrick     // No match; bail.
233709467b48Spatrick     return Cond;
233809467b48Spatrick   }
233909467b48Spatrick 
234009467b48Spatrick   // To handle a max with more than two operands, this optimization would
234109467b48Spatrick   // require additional checking and setup.
234209467b48Spatrick   if (Max->getNumOperands() != 2)
234309467b48Spatrick     return Cond;
234409467b48Spatrick 
234509467b48Spatrick   const SCEV *MaxLHS = Max->getOperand(0);
234609467b48Spatrick   const SCEV *MaxRHS = Max->getOperand(1);
234709467b48Spatrick 
234809467b48Spatrick   // ScalarEvolution canonicalizes constants to the left. For < and >, look
234909467b48Spatrick   // for a comparison with 1. For <= and >=, a comparison with zero.
235009467b48Spatrick   if (!MaxLHS ||
235109467b48Spatrick       (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
235209467b48Spatrick     return Cond;
235309467b48Spatrick 
235409467b48Spatrick   // Check the relevant induction variable for conformance to
235509467b48Spatrick   // the pattern.
235609467b48Spatrick   const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
235709467b48Spatrick   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
235809467b48Spatrick   if (!AR || !AR->isAffine() ||
235909467b48Spatrick       AR->getStart() != One ||
236009467b48Spatrick       AR->getStepRecurrence(SE) != One)
236109467b48Spatrick     return Cond;
236209467b48Spatrick 
236309467b48Spatrick   assert(AR->getLoop() == L &&
236409467b48Spatrick          "Loop condition operand is an addrec in a different loop!");
236509467b48Spatrick 
236609467b48Spatrick   // Check the right operand of the select, and remember it, as it will
236709467b48Spatrick   // be used in the new comparison instruction.
236809467b48Spatrick   Value *NewRHS = nullptr;
236909467b48Spatrick   if (ICmpInst::isTrueWhenEqual(Pred)) {
237009467b48Spatrick     // Look for n+1, and grab n.
237109467b48Spatrick     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
237209467b48Spatrick       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
237309467b48Spatrick          if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
237409467b48Spatrick            NewRHS = BO->getOperand(0);
237509467b48Spatrick     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
237609467b48Spatrick       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
237709467b48Spatrick         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
237809467b48Spatrick           NewRHS = BO->getOperand(0);
237909467b48Spatrick     if (!NewRHS)
238009467b48Spatrick       return Cond;
238109467b48Spatrick   } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
238209467b48Spatrick     NewRHS = Sel->getOperand(1);
238309467b48Spatrick   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
238409467b48Spatrick     NewRHS = Sel->getOperand(2);
238509467b48Spatrick   else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
238609467b48Spatrick     NewRHS = SU->getValue();
238709467b48Spatrick   else
238809467b48Spatrick     // Max doesn't match expected pattern.
238909467b48Spatrick     return Cond;
239009467b48Spatrick 
239109467b48Spatrick   // Determine the new comparison opcode. It may be signed or unsigned,
239209467b48Spatrick   // and the original comparison may be either equality or inequality.
239309467b48Spatrick   if (Cond->getPredicate() == CmpInst::ICMP_EQ)
239409467b48Spatrick     Pred = CmpInst::getInversePredicate(Pred);
239509467b48Spatrick 
239609467b48Spatrick   // Ok, everything looks ok to change the condition into an SLT or SGE and
239709467b48Spatrick   // delete the max calculation.
239809467b48Spatrick   ICmpInst *NewCond =
239909467b48Spatrick     new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
240009467b48Spatrick 
240109467b48Spatrick   // Delete the max calculation instructions.
240273471bf0Spatrick   NewCond->setDebugLoc(Cond->getDebugLoc());
240309467b48Spatrick   Cond->replaceAllUsesWith(NewCond);
240409467b48Spatrick   CondUse->setUser(NewCond);
240509467b48Spatrick   Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
240609467b48Spatrick   Cond->eraseFromParent();
240709467b48Spatrick   Sel->eraseFromParent();
240809467b48Spatrick   if (Cmp->use_empty())
240909467b48Spatrick     Cmp->eraseFromParent();
241009467b48Spatrick   return NewCond;
241109467b48Spatrick }
241209467b48Spatrick 
241309467b48Spatrick /// Change loop terminating condition to use the postinc iv when possible.
241409467b48Spatrick void
OptimizeLoopTermCond()241509467b48Spatrick LSRInstance::OptimizeLoopTermCond() {
241609467b48Spatrick   SmallPtrSet<Instruction *, 4> PostIncs;
241709467b48Spatrick 
241809467b48Spatrick   // We need a different set of heuristics for rotated and non-rotated loops.
241909467b48Spatrick   // If a loop is rotated then the latch is also the backedge, so inserting
242009467b48Spatrick   // post-inc expressions just before the latch is ideal. To reduce live ranges
242109467b48Spatrick   // it also makes sense to rewrite terminating conditions to use post-inc
242209467b48Spatrick   // expressions.
242309467b48Spatrick   //
242409467b48Spatrick   // If the loop is not rotated then the latch is not a backedge; the latch
242509467b48Spatrick   // check is done in the loop head. Adding post-inc expressions before the
242609467b48Spatrick   // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
242709467b48Spatrick   // in the loop body. In this case we do *not* want to use post-inc expressions
242809467b48Spatrick   // in the latch check, and we want to insert post-inc expressions before
242909467b48Spatrick   // the backedge.
243009467b48Spatrick   BasicBlock *LatchBlock = L->getLoopLatch();
243109467b48Spatrick   SmallVector<BasicBlock*, 8> ExitingBlocks;
243209467b48Spatrick   L->getExitingBlocks(ExitingBlocks);
2433*d415bd75Srobert   if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {
243409467b48Spatrick     // The backedge doesn't exit the loop; treat this as a head-tested loop.
243509467b48Spatrick     IVIncInsertPos = LatchBlock->getTerminator();
243609467b48Spatrick     return;
243709467b48Spatrick   }
243809467b48Spatrick 
243909467b48Spatrick   // Otherwise treat this as a rotated loop.
244009467b48Spatrick   for (BasicBlock *ExitingBlock : ExitingBlocks) {
244109467b48Spatrick     // Get the terminating condition for the loop if possible.  If we
244209467b48Spatrick     // can, we want to change it to use a post-incremented version of its
244309467b48Spatrick     // induction variable, to allow coalescing the live ranges for the IV into
244409467b48Spatrick     // one register value.
244509467b48Spatrick 
244609467b48Spatrick     BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
244709467b48Spatrick     if (!TermBr)
244809467b48Spatrick       continue;
244909467b48Spatrick     // FIXME: Overly conservative, termination condition could be an 'or' etc..
245009467b48Spatrick     if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
245109467b48Spatrick       continue;
245209467b48Spatrick 
245309467b48Spatrick     // Search IVUsesByStride to find Cond's IVUse if there is one.
245409467b48Spatrick     IVStrideUse *CondUse = nullptr;
245509467b48Spatrick     ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
245609467b48Spatrick     if (!FindIVUserForCond(Cond, CondUse))
245709467b48Spatrick       continue;
245809467b48Spatrick 
245909467b48Spatrick     // If the trip count is computed in terms of a max (due to ScalarEvolution
246009467b48Spatrick     // being unable to find a sufficient guard, for example), change the loop
246109467b48Spatrick     // comparison to use SLT or ULT instead of NE.
246209467b48Spatrick     // One consequence of doing this now is that it disrupts the count-down
246309467b48Spatrick     // optimization. That's not always a bad thing though, because in such
246409467b48Spatrick     // cases it may still be worthwhile to avoid a max.
246509467b48Spatrick     Cond = OptimizeMax(Cond, CondUse);
246609467b48Spatrick 
246709467b48Spatrick     // If this exiting block dominates the latch block, it may also use
246809467b48Spatrick     // the post-inc value if it won't be shared with other uses.
246909467b48Spatrick     // Check for dominance.
247009467b48Spatrick     if (!DT.dominates(ExitingBlock, LatchBlock))
247109467b48Spatrick       continue;
247209467b48Spatrick 
247309467b48Spatrick     // Conservatively avoid trying to use the post-inc value in non-latch
247409467b48Spatrick     // exits if there may be pre-inc users in intervening blocks.
247509467b48Spatrick     if (LatchBlock != ExitingBlock)
247609467b48Spatrick       for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
247709467b48Spatrick         // Test if the use is reachable from the exiting block. This dominator
247809467b48Spatrick         // query is a conservative approximation of reachability.
247909467b48Spatrick         if (&*UI != CondUse &&
248009467b48Spatrick             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
248109467b48Spatrick           // Conservatively assume there may be reuse if the quotient of their
248209467b48Spatrick           // strides could be a legal scale.
248309467b48Spatrick           const SCEV *A = IU.getStride(*CondUse, L);
248409467b48Spatrick           const SCEV *B = IU.getStride(*UI, L);
248509467b48Spatrick           if (!A || !B) continue;
248609467b48Spatrick           if (SE.getTypeSizeInBits(A->getType()) !=
248709467b48Spatrick               SE.getTypeSizeInBits(B->getType())) {
248809467b48Spatrick             if (SE.getTypeSizeInBits(A->getType()) >
248909467b48Spatrick                 SE.getTypeSizeInBits(B->getType()))
249009467b48Spatrick               B = SE.getSignExtendExpr(B, A->getType());
249109467b48Spatrick             else
249209467b48Spatrick               A = SE.getSignExtendExpr(A, B->getType());
249309467b48Spatrick           }
249409467b48Spatrick           if (const SCEVConstant *D =
249509467b48Spatrick                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
249609467b48Spatrick             const ConstantInt *C = D->getValue();
249709467b48Spatrick             // Stride of one or negative one can have reuse with non-addresses.
249809467b48Spatrick             if (C->isOne() || C->isMinusOne())
249909467b48Spatrick               goto decline_post_inc;
250009467b48Spatrick             // Avoid weird situations.
250109467b48Spatrick             if (C->getValue().getMinSignedBits() >= 64 ||
250209467b48Spatrick                 C->getValue().isMinSignedValue())
250309467b48Spatrick               goto decline_post_inc;
250409467b48Spatrick             // Check for possible scaled-address reuse.
250509467b48Spatrick             if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
250609467b48Spatrick               MemAccessTy AccessTy = getAccessType(
250709467b48Spatrick                   TTI, UI->getUser(), UI->getOperandValToReplace());
250809467b48Spatrick               int64_t Scale = C->getSExtValue();
250909467b48Spatrick               if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
251009467b48Spatrick                                             /*BaseOffset=*/0,
251109467b48Spatrick                                             /*HasBaseReg=*/false, Scale,
251209467b48Spatrick                                             AccessTy.AddrSpace))
251309467b48Spatrick                 goto decline_post_inc;
251409467b48Spatrick               Scale = -Scale;
251509467b48Spatrick               if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
251609467b48Spatrick                                             /*BaseOffset=*/0,
251709467b48Spatrick                                             /*HasBaseReg=*/false, Scale,
251809467b48Spatrick                                             AccessTy.AddrSpace))
251909467b48Spatrick                 goto decline_post_inc;
252009467b48Spatrick             }
252109467b48Spatrick           }
252209467b48Spatrick         }
252309467b48Spatrick 
252409467b48Spatrick     LLVM_DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
252509467b48Spatrick                       << *Cond << '\n');
252609467b48Spatrick 
252709467b48Spatrick     // It's possible for the setcc instruction to be anywhere in the loop, and
252809467b48Spatrick     // possible for it to have multiple users.  If it is not immediately before
252909467b48Spatrick     // the exiting block branch, move it.
253073471bf0Spatrick     if (Cond->getNextNonDebugInstruction() != TermBr) {
253109467b48Spatrick       if (Cond->hasOneUse()) {
253209467b48Spatrick         Cond->moveBefore(TermBr);
253309467b48Spatrick       } else {
253409467b48Spatrick         // Clone the terminating condition and insert into the loopend.
253509467b48Spatrick         ICmpInst *OldCond = Cond;
253609467b48Spatrick         Cond = cast<ICmpInst>(Cond->clone());
253709467b48Spatrick         Cond->setName(L->getHeader()->getName() + ".termcond");
2538*d415bd75Srobert         Cond->insertInto(ExitingBlock, TermBr->getIterator());
253909467b48Spatrick 
254009467b48Spatrick         // Clone the IVUse, as the old use still exists!
254109467b48Spatrick         CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
254209467b48Spatrick         TermBr->replaceUsesOfWith(OldCond, Cond);
254309467b48Spatrick       }
254409467b48Spatrick     }
254509467b48Spatrick 
254609467b48Spatrick     // If we get to here, we know that we can transform the setcc instruction to
254709467b48Spatrick     // use the post-incremented version of the IV, allowing us to coalesce the
254809467b48Spatrick     // live ranges for the IV correctly.
254909467b48Spatrick     CondUse->transformToPostInc(L);
255009467b48Spatrick     Changed = true;
255109467b48Spatrick 
255209467b48Spatrick     PostIncs.insert(Cond);
255309467b48Spatrick   decline_post_inc:;
255409467b48Spatrick   }
255509467b48Spatrick 
255609467b48Spatrick   // Determine an insertion point for the loop induction variable increment. It
255709467b48Spatrick   // must dominate all the post-inc comparisons we just set up, and it must
255809467b48Spatrick   // dominate the loop latch edge.
255909467b48Spatrick   IVIncInsertPos = L->getLoopLatch()->getTerminator();
2560*d415bd75Srobert   for (Instruction *Inst : PostIncs)
2561*d415bd75Srobert     IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
256209467b48Spatrick }
256309467b48Spatrick 
256409467b48Spatrick /// Determine if the given use can accommodate a fixup at the given offset and
256509467b48Spatrick /// other details. If so, update the use and return true.
reconcileNewOffset(LSRUse & LU,int64_t NewOffset,bool HasBaseReg,LSRUse::KindType Kind,MemAccessTy AccessTy)256609467b48Spatrick bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
256709467b48Spatrick                                      bool HasBaseReg, LSRUse::KindType Kind,
256809467b48Spatrick                                      MemAccessTy AccessTy) {
256909467b48Spatrick   int64_t NewMinOffset = LU.MinOffset;
257009467b48Spatrick   int64_t NewMaxOffset = LU.MaxOffset;
257109467b48Spatrick   MemAccessTy NewAccessTy = AccessTy;
257209467b48Spatrick 
257309467b48Spatrick   // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
257409467b48Spatrick   // something conservative, however this can pessimize in the case that one of
257509467b48Spatrick   // the uses will have all its uses outside the loop, for example.
257609467b48Spatrick   if (LU.Kind != Kind)
257709467b48Spatrick     return false;
257809467b48Spatrick 
257909467b48Spatrick   // Check for a mismatched access type, and fall back conservatively as needed.
258009467b48Spatrick   // TODO: Be less conservative when the type is similar and can use the same
258109467b48Spatrick   // addressing modes.
258209467b48Spatrick   if (Kind == LSRUse::Address) {
258309467b48Spatrick     if (AccessTy.MemTy != LU.AccessTy.MemTy) {
258409467b48Spatrick       NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
258509467b48Spatrick                                             AccessTy.AddrSpace);
258609467b48Spatrick     }
258709467b48Spatrick   }
258809467b48Spatrick 
258909467b48Spatrick   // Conservatively assume HasBaseReg is true for now.
259009467b48Spatrick   if (NewOffset < LU.MinOffset) {
259109467b48Spatrick     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
259209467b48Spatrick                           LU.MaxOffset - NewOffset, HasBaseReg))
259309467b48Spatrick       return false;
259409467b48Spatrick     NewMinOffset = NewOffset;
259509467b48Spatrick   } else if (NewOffset > LU.MaxOffset) {
259609467b48Spatrick     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
259709467b48Spatrick                           NewOffset - LU.MinOffset, HasBaseReg))
259809467b48Spatrick       return false;
259909467b48Spatrick     NewMaxOffset = NewOffset;
260009467b48Spatrick   }
260109467b48Spatrick 
260209467b48Spatrick   // Update the use.
260309467b48Spatrick   LU.MinOffset = NewMinOffset;
260409467b48Spatrick   LU.MaxOffset = NewMaxOffset;
260509467b48Spatrick   LU.AccessTy = NewAccessTy;
260609467b48Spatrick   return true;
260709467b48Spatrick }
260809467b48Spatrick 
260909467b48Spatrick /// Return an LSRUse index and an offset value for a fixup which needs the given
261009467b48Spatrick /// expression, with the given kind and optional access type.  Either reuse an
261109467b48Spatrick /// existing use or create a new one, as needed.
getUse(const SCEV * & Expr,LSRUse::KindType Kind,MemAccessTy AccessTy)261209467b48Spatrick std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
261309467b48Spatrick                                                LSRUse::KindType Kind,
261409467b48Spatrick                                                MemAccessTy AccessTy) {
261509467b48Spatrick   const SCEV *Copy = Expr;
261609467b48Spatrick   int64_t Offset = ExtractImmediate(Expr, SE);
261709467b48Spatrick 
261809467b48Spatrick   // Basic uses can't accept any offset, for example.
261909467b48Spatrick   if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
262009467b48Spatrick                         Offset, /*HasBaseReg=*/ true)) {
262109467b48Spatrick     Expr = Copy;
262209467b48Spatrick     Offset = 0;
262309467b48Spatrick   }
262409467b48Spatrick 
262509467b48Spatrick   std::pair<UseMapTy::iterator, bool> P =
262609467b48Spatrick     UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
262709467b48Spatrick   if (!P.second) {
262809467b48Spatrick     // A use already existed with this base.
262909467b48Spatrick     size_t LUIdx = P.first->second;
263009467b48Spatrick     LSRUse &LU = Uses[LUIdx];
263109467b48Spatrick     if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
263209467b48Spatrick       // Reuse this use.
263309467b48Spatrick       return std::make_pair(LUIdx, Offset);
263409467b48Spatrick   }
263509467b48Spatrick 
263609467b48Spatrick   // Create a new use.
263709467b48Spatrick   size_t LUIdx = Uses.size();
263809467b48Spatrick   P.first->second = LUIdx;
263909467b48Spatrick   Uses.push_back(LSRUse(Kind, AccessTy));
264009467b48Spatrick   LSRUse &LU = Uses[LUIdx];
264109467b48Spatrick 
264209467b48Spatrick   LU.MinOffset = Offset;
264309467b48Spatrick   LU.MaxOffset = Offset;
264409467b48Spatrick   return std::make_pair(LUIdx, Offset);
264509467b48Spatrick }
264609467b48Spatrick 
264709467b48Spatrick /// Delete the given use from the Uses list.
DeleteUse(LSRUse & LU,size_t LUIdx)264809467b48Spatrick void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
264909467b48Spatrick   if (&LU != &Uses.back())
265009467b48Spatrick     std::swap(LU, Uses.back());
265109467b48Spatrick   Uses.pop_back();
265209467b48Spatrick 
265309467b48Spatrick   // Update RegUses.
265409467b48Spatrick   RegUses.swapAndDropUse(LUIdx, Uses.size());
265509467b48Spatrick }
265609467b48Spatrick 
265709467b48Spatrick /// Look for a use distinct from OrigLU which is has a formula that has the same
265809467b48Spatrick /// registers as the given formula.
265909467b48Spatrick LSRUse *
FindUseWithSimilarFormula(const Formula & OrigF,const LSRUse & OrigLU)266009467b48Spatrick LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
266109467b48Spatrick                                        const LSRUse &OrigLU) {
266209467b48Spatrick   // Search all uses for the formula. This could be more clever.
266309467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
266409467b48Spatrick     LSRUse &LU = Uses[LUIdx];
266509467b48Spatrick     // Check whether this use is close enough to OrigLU, to see whether it's
266609467b48Spatrick     // worthwhile looking through its formulae.
266709467b48Spatrick     // Ignore ICmpZero uses because they may contain formulae generated by
266809467b48Spatrick     // GenerateICmpZeroScales, in which case adding fixup offsets may
266909467b48Spatrick     // be invalid.
267009467b48Spatrick     if (&LU != &OrigLU &&
267109467b48Spatrick         LU.Kind != LSRUse::ICmpZero &&
267209467b48Spatrick         LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
267309467b48Spatrick         LU.WidestFixupType == OrigLU.WidestFixupType &&
267409467b48Spatrick         LU.HasFormulaWithSameRegs(OrigF)) {
267509467b48Spatrick       // Scan through this use's formulae.
267609467b48Spatrick       for (const Formula &F : LU.Formulae) {
267709467b48Spatrick         // Check to see if this formula has the same registers and symbols
267809467b48Spatrick         // as OrigF.
267909467b48Spatrick         if (F.BaseRegs == OrigF.BaseRegs &&
268009467b48Spatrick             F.ScaledReg == OrigF.ScaledReg &&
268109467b48Spatrick             F.BaseGV == OrigF.BaseGV &&
268209467b48Spatrick             F.Scale == OrigF.Scale &&
268309467b48Spatrick             F.UnfoldedOffset == OrigF.UnfoldedOffset) {
268409467b48Spatrick           if (F.BaseOffset == 0)
268509467b48Spatrick             return &LU;
268609467b48Spatrick           // This is the formula where all the registers and symbols matched;
268709467b48Spatrick           // there aren't going to be any others. Since we declined it, we
268809467b48Spatrick           // can skip the rest of the formulae and proceed to the next LSRUse.
268909467b48Spatrick           break;
269009467b48Spatrick         }
269109467b48Spatrick       }
269209467b48Spatrick     }
269309467b48Spatrick   }
269409467b48Spatrick 
269509467b48Spatrick   // Nothing looked good.
269609467b48Spatrick   return nullptr;
269709467b48Spatrick }
269809467b48Spatrick 
CollectInterestingTypesAndFactors()269909467b48Spatrick void LSRInstance::CollectInterestingTypesAndFactors() {
270009467b48Spatrick   SmallSetVector<const SCEV *, 4> Strides;
270109467b48Spatrick 
270209467b48Spatrick   // Collect interesting types and strides.
270309467b48Spatrick   SmallVector<const SCEV *, 4> Worklist;
270409467b48Spatrick   for (const IVStrideUse &U : IU) {
270509467b48Spatrick     const SCEV *Expr = IU.getExpr(U);
270609467b48Spatrick 
270709467b48Spatrick     // Collect interesting types.
270809467b48Spatrick     Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
270909467b48Spatrick 
271009467b48Spatrick     // Add strides for mentioned loops.
271109467b48Spatrick     Worklist.push_back(Expr);
271209467b48Spatrick     do {
271309467b48Spatrick       const SCEV *S = Worklist.pop_back_val();
271409467b48Spatrick       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
271509467b48Spatrick         if (AR->getLoop() == L)
271609467b48Spatrick           Strides.insert(AR->getStepRecurrence(SE));
271709467b48Spatrick         Worklist.push_back(AR->getStart());
271809467b48Spatrick       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2719*d415bd75Srobert         append_range(Worklist, Add->operands());
272009467b48Spatrick       }
272109467b48Spatrick     } while (!Worklist.empty());
272209467b48Spatrick   }
272309467b48Spatrick 
272409467b48Spatrick   // Compute interesting factors from the set of interesting strides.
272509467b48Spatrick   for (SmallSetVector<const SCEV *, 4>::const_iterator
272609467b48Spatrick        I = Strides.begin(), E = Strides.end(); I != E; ++I)
272709467b48Spatrick     for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
272809467b48Spatrick          std::next(I); NewStrideIter != E; ++NewStrideIter) {
272909467b48Spatrick       const SCEV *OldStride = *I;
273009467b48Spatrick       const SCEV *NewStride = *NewStrideIter;
273109467b48Spatrick 
273209467b48Spatrick       if (SE.getTypeSizeInBits(OldStride->getType()) !=
273309467b48Spatrick           SE.getTypeSizeInBits(NewStride->getType())) {
273409467b48Spatrick         if (SE.getTypeSizeInBits(OldStride->getType()) >
273509467b48Spatrick             SE.getTypeSizeInBits(NewStride->getType()))
273609467b48Spatrick           NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
273709467b48Spatrick         else
273809467b48Spatrick           OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
273909467b48Spatrick       }
274009467b48Spatrick       if (const SCEVConstant *Factor =
274109467b48Spatrick             dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
274209467b48Spatrick                                                         SE, true))) {
274373471bf0Spatrick         if (Factor->getAPInt().getMinSignedBits() <= 64 && !Factor->isZero())
274409467b48Spatrick           Factors.insert(Factor->getAPInt().getSExtValue());
274509467b48Spatrick       } else if (const SCEVConstant *Factor =
274609467b48Spatrick                    dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
274709467b48Spatrick                                                                NewStride,
274809467b48Spatrick                                                                SE, true))) {
274973471bf0Spatrick         if (Factor->getAPInt().getMinSignedBits() <= 64 && !Factor->isZero())
275009467b48Spatrick           Factors.insert(Factor->getAPInt().getSExtValue());
275109467b48Spatrick       }
275209467b48Spatrick     }
275309467b48Spatrick 
275409467b48Spatrick   // If all uses use the same type, don't bother looking for truncation-based
275509467b48Spatrick   // reuse.
275609467b48Spatrick   if (Types.size() == 1)
275709467b48Spatrick     Types.clear();
275809467b48Spatrick 
275909467b48Spatrick   LLVM_DEBUG(print_factors_and_types(dbgs()));
276009467b48Spatrick }
276109467b48Spatrick 
276209467b48Spatrick /// Helper for CollectChains that finds an IV operand (computed by an AddRec in
276309467b48Spatrick /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
276409467b48Spatrick /// IVStrideUses, we could partially skip this.
276509467b48Spatrick static User::op_iterator
findIVOperand(User::op_iterator OI,User::op_iterator OE,Loop * L,ScalarEvolution & SE)276609467b48Spatrick findIVOperand(User::op_iterator OI, User::op_iterator OE,
276709467b48Spatrick               Loop *L, ScalarEvolution &SE) {
276809467b48Spatrick   for(; OI != OE; ++OI) {
276909467b48Spatrick     if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
277009467b48Spatrick       if (!SE.isSCEVable(Oper->getType()))
277109467b48Spatrick         continue;
277209467b48Spatrick 
277309467b48Spatrick       if (const SCEVAddRecExpr *AR =
277409467b48Spatrick           dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
277509467b48Spatrick         if (AR->getLoop() == L)
277609467b48Spatrick           break;
277709467b48Spatrick       }
277809467b48Spatrick     }
277909467b48Spatrick   }
278009467b48Spatrick   return OI;
278109467b48Spatrick }
278209467b48Spatrick 
278309467b48Spatrick /// IVChain logic must consistently peek base TruncInst operands, so wrap it in
278409467b48Spatrick /// a convenient helper.
getWideOperand(Value * Oper)278509467b48Spatrick static Value *getWideOperand(Value *Oper) {
278609467b48Spatrick   if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
278709467b48Spatrick     return Trunc->getOperand(0);
278809467b48Spatrick   return Oper;
278909467b48Spatrick }
279009467b48Spatrick 
279109467b48Spatrick /// Return true if we allow an IV chain to include both types.
isCompatibleIVType(Value * LVal,Value * RVal)279209467b48Spatrick static bool isCompatibleIVType(Value *LVal, Value *RVal) {
279309467b48Spatrick   Type *LType = LVal->getType();
279409467b48Spatrick   Type *RType = RVal->getType();
279509467b48Spatrick   return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
279609467b48Spatrick                               // Different address spaces means (possibly)
279709467b48Spatrick                               // different types of the pointer implementation,
279809467b48Spatrick                               // e.g. i16 vs i32 so disallow that.
279909467b48Spatrick                               (LType->getPointerAddressSpace() ==
280009467b48Spatrick                                RType->getPointerAddressSpace()));
280109467b48Spatrick }
280209467b48Spatrick 
280309467b48Spatrick /// Return an approximation of this SCEV expression's "base", or NULL for any
280409467b48Spatrick /// constant. Returning the expression itself is conservative. Returning a
280509467b48Spatrick /// deeper subexpression is more precise and valid as long as it isn't less
280609467b48Spatrick /// complex than another subexpression. For expressions involving multiple
280709467b48Spatrick /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
280809467b48Spatrick /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
280909467b48Spatrick /// IVInc==b-a.
281009467b48Spatrick ///
281109467b48Spatrick /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
281209467b48Spatrick /// SCEVUnknown, we simply return the rightmost SCEV operand.
getExprBase(const SCEV * S)281309467b48Spatrick static const SCEV *getExprBase(const SCEV *S) {
281409467b48Spatrick   switch (S->getSCEVType()) {
281509467b48Spatrick   default: // uncluding scUnknown.
281609467b48Spatrick     return S;
281709467b48Spatrick   case scConstant:
281809467b48Spatrick     return nullptr;
281909467b48Spatrick   case scTruncate:
282009467b48Spatrick     return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
282109467b48Spatrick   case scZeroExtend:
282209467b48Spatrick     return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
282309467b48Spatrick   case scSignExtend:
282409467b48Spatrick     return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
282509467b48Spatrick   case scAddExpr: {
282609467b48Spatrick     // Skip over scaled operands (scMulExpr) to follow add operands as long as
282709467b48Spatrick     // there's nothing more complex.
282809467b48Spatrick     // FIXME: not sure if we want to recognize negation.
282909467b48Spatrick     const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
2830*d415bd75Srobert     for (const SCEV *SubExpr : reverse(Add->operands())) {
283109467b48Spatrick       if (SubExpr->getSCEVType() == scAddExpr)
283209467b48Spatrick         return getExprBase(SubExpr);
283309467b48Spatrick 
283409467b48Spatrick       if (SubExpr->getSCEVType() != scMulExpr)
283509467b48Spatrick         return SubExpr;
283609467b48Spatrick     }
283709467b48Spatrick     return S; // all operands are scaled, be conservative.
283809467b48Spatrick   }
283909467b48Spatrick   case scAddRecExpr:
284009467b48Spatrick     return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
284109467b48Spatrick   }
284273471bf0Spatrick   llvm_unreachable("Unknown SCEV kind!");
284309467b48Spatrick }
284409467b48Spatrick 
284509467b48Spatrick /// Return true if the chain increment is profitable to expand into a loop
284609467b48Spatrick /// invariant value, which may require its own register. A profitable chain
284709467b48Spatrick /// increment will be an offset relative to the same base. We allow such offsets
284809467b48Spatrick /// to potentially be used as chain increment as long as it's not obviously
284909467b48Spatrick /// expensive to expand using real instructions.
isProfitableIncrement(const SCEV * OperExpr,const SCEV * IncExpr,ScalarEvolution & SE)285009467b48Spatrick bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
285109467b48Spatrick                                     const SCEV *IncExpr,
285209467b48Spatrick                                     ScalarEvolution &SE) {
285309467b48Spatrick   // Aggressively form chains when -stress-ivchain.
285409467b48Spatrick   if (StressIVChain)
285509467b48Spatrick     return true;
285609467b48Spatrick 
285709467b48Spatrick   // Do not replace a constant offset from IV head with a nonconstant IV
285809467b48Spatrick   // increment.
285909467b48Spatrick   if (!isa<SCEVConstant>(IncExpr)) {
286009467b48Spatrick     const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
286109467b48Spatrick     if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
286209467b48Spatrick       return false;
286309467b48Spatrick   }
286409467b48Spatrick 
286509467b48Spatrick   SmallPtrSet<const SCEV*, 8> Processed;
286609467b48Spatrick   return !isHighCostExpansion(IncExpr, Processed, SE);
286709467b48Spatrick }
286809467b48Spatrick 
286909467b48Spatrick /// Return true if the number of registers needed for the chain is estimated to
287009467b48Spatrick /// be less than the number required for the individual IV users. First prohibit
287109467b48Spatrick /// any IV users that keep the IV live across increments (the Users set should
287209467b48Spatrick /// be empty). Next count the number and type of increments in the chain.
287309467b48Spatrick ///
287409467b48Spatrick /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
287509467b48Spatrick /// effectively use postinc addressing modes. Only consider it profitable it the
287609467b48Spatrick /// increments can be computed in fewer registers when chained.
287709467b48Spatrick ///
287809467b48Spatrick /// TODO: Consider IVInc free if it's already used in another chains.
isProfitableChain(IVChain & Chain,SmallPtrSetImpl<Instruction * > & Users,ScalarEvolution & SE,const TargetTransformInfo & TTI)2879097a140dSpatrick static bool isProfitableChain(IVChain &Chain,
2880097a140dSpatrick                               SmallPtrSetImpl<Instruction *> &Users,
2881097a140dSpatrick                               ScalarEvolution &SE,
2882097a140dSpatrick                               const TargetTransformInfo &TTI) {
288309467b48Spatrick   if (StressIVChain)
288409467b48Spatrick     return true;
288509467b48Spatrick 
288609467b48Spatrick   if (!Chain.hasIncs())
288709467b48Spatrick     return false;
288809467b48Spatrick 
288909467b48Spatrick   if (!Users.empty()) {
289009467b48Spatrick     LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
289109467b48Spatrick                for (Instruction *Inst
289209467b48Spatrick                     : Users) { dbgs() << "  " << *Inst << "\n"; });
289309467b48Spatrick     return false;
289409467b48Spatrick   }
289509467b48Spatrick   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
289609467b48Spatrick 
289709467b48Spatrick   // The chain itself may require a register, so intialize cost to 1.
289809467b48Spatrick   int cost = 1;
289909467b48Spatrick 
290009467b48Spatrick   // A complete chain likely eliminates the need for keeping the original IV in
290109467b48Spatrick   // a register. LSR does not currently know how to form a complete chain unless
290209467b48Spatrick   // the header phi already exists.
290309467b48Spatrick   if (isa<PHINode>(Chain.tailUserInst())
290409467b48Spatrick       && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
290509467b48Spatrick     --cost;
290609467b48Spatrick   }
290709467b48Spatrick   const SCEV *LastIncExpr = nullptr;
290809467b48Spatrick   unsigned NumConstIncrements = 0;
290909467b48Spatrick   unsigned NumVarIncrements = 0;
291009467b48Spatrick   unsigned NumReusedIncrements = 0;
2911097a140dSpatrick 
2912097a140dSpatrick   if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
2913097a140dSpatrick     return true;
2914097a140dSpatrick 
291509467b48Spatrick   for (const IVInc &Inc : Chain) {
2916097a140dSpatrick     if (TTI.isProfitableLSRChainElement(Inc.UserInst))
2917097a140dSpatrick       return true;
291809467b48Spatrick     if (Inc.IncExpr->isZero())
291909467b48Spatrick       continue;
292009467b48Spatrick 
292109467b48Spatrick     // Incrementing by zero or some constant is neutral. We assume constants can
292209467b48Spatrick     // be folded into an addressing mode or an add's immediate operand.
292309467b48Spatrick     if (isa<SCEVConstant>(Inc.IncExpr)) {
292409467b48Spatrick       ++NumConstIncrements;
292509467b48Spatrick       continue;
292609467b48Spatrick     }
292709467b48Spatrick 
292809467b48Spatrick     if (Inc.IncExpr == LastIncExpr)
292909467b48Spatrick       ++NumReusedIncrements;
293009467b48Spatrick     else
293109467b48Spatrick       ++NumVarIncrements;
293209467b48Spatrick 
293309467b48Spatrick     LastIncExpr = Inc.IncExpr;
293409467b48Spatrick   }
293509467b48Spatrick   // An IV chain with a single increment is handled by LSR's postinc
293609467b48Spatrick   // uses. However, a chain with multiple increments requires keeping the IV's
293709467b48Spatrick   // value live longer than it needs to be if chained.
293809467b48Spatrick   if (NumConstIncrements > 1)
293909467b48Spatrick     --cost;
294009467b48Spatrick 
294109467b48Spatrick   // Materializing increment expressions in the preheader that didn't exist in
294209467b48Spatrick   // the original code may cost a register. For example, sign-extended array
294309467b48Spatrick   // indices can produce ridiculous increments like this:
294409467b48Spatrick   // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
294509467b48Spatrick   cost += NumVarIncrements;
294609467b48Spatrick 
294709467b48Spatrick   // Reusing variable increments likely saves a register to hold the multiple of
294809467b48Spatrick   // the stride.
294909467b48Spatrick   cost -= NumReusedIncrements;
295009467b48Spatrick 
295109467b48Spatrick   LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
295209467b48Spatrick                     << "\n");
295309467b48Spatrick 
295409467b48Spatrick   return cost < 0;
295509467b48Spatrick }
295609467b48Spatrick 
295709467b48Spatrick /// Add this IV user to an existing chain or make it the head of a new chain.
ChainInstruction(Instruction * UserInst,Instruction * IVOper,SmallVectorImpl<ChainUsers> & ChainUsersVec)295809467b48Spatrick void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
295909467b48Spatrick                                    SmallVectorImpl<ChainUsers> &ChainUsersVec) {
296009467b48Spatrick   // When IVs are used as types of varying widths, they are generally converted
296109467b48Spatrick   // to a wider type with some uses remaining narrow under a (free) trunc.
296209467b48Spatrick   Value *const NextIV = getWideOperand(IVOper);
296309467b48Spatrick   const SCEV *const OperExpr = SE.getSCEV(NextIV);
296409467b48Spatrick   const SCEV *const OperExprBase = getExprBase(OperExpr);
296509467b48Spatrick 
296609467b48Spatrick   // Visit all existing chains. Check if its IVOper can be computed as a
296709467b48Spatrick   // profitable loop invariant increment from the last link in the Chain.
296809467b48Spatrick   unsigned ChainIdx = 0, NChains = IVChainVec.size();
296909467b48Spatrick   const SCEV *LastIncExpr = nullptr;
297009467b48Spatrick   for (; ChainIdx < NChains; ++ChainIdx) {
297109467b48Spatrick     IVChain &Chain = IVChainVec[ChainIdx];
297209467b48Spatrick 
297309467b48Spatrick     // Prune the solution space aggressively by checking that both IV operands
297409467b48Spatrick     // are expressions that operate on the same unscaled SCEVUnknown. This
297509467b48Spatrick     // "base" will be canceled by the subsequent getMinusSCEV call. Checking
297609467b48Spatrick     // first avoids creating extra SCEV expressions.
297709467b48Spatrick     if (!StressIVChain && Chain.ExprBase != OperExprBase)
297809467b48Spatrick       continue;
297909467b48Spatrick 
298009467b48Spatrick     Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
298109467b48Spatrick     if (!isCompatibleIVType(PrevIV, NextIV))
298209467b48Spatrick       continue;
298309467b48Spatrick 
298409467b48Spatrick     // A phi node terminates a chain.
298509467b48Spatrick     if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
298609467b48Spatrick       continue;
298709467b48Spatrick 
298809467b48Spatrick     // The increment must be loop-invariant so it can be kept in a register.
298909467b48Spatrick     const SCEV *PrevExpr = SE.getSCEV(PrevIV);
299009467b48Spatrick     const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
299173471bf0Spatrick     if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))
299209467b48Spatrick       continue;
299309467b48Spatrick 
299409467b48Spatrick     if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
299509467b48Spatrick       LastIncExpr = IncExpr;
299609467b48Spatrick       break;
299709467b48Spatrick     }
299809467b48Spatrick   }
299909467b48Spatrick   // If we haven't found a chain, create a new one, unless we hit the max. Don't
300009467b48Spatrick   // bother for phi nodes, because they must be last in the chain.
300109467b48Spatrick   if (ChainIdx == NChains) {
300209467b48Spatrick     if (isa<PHINode>(UserInst))
300309467b48Spatrick       return;
300409467b48Spatrick     if (NChains >= MaxChains && !StressIVChain) {
300509467b48Spatrick       LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
300609467b48Spatrick       return;
300709467b48Spatrick     }
300809467b48Spatrick     LastIncExpr = OperExpr;
300909467b48Spatrick     // IVUsers may have skipped over sign/zero extensions. We don't currently
301009467b48Spatrick     // attempt to form chains involving extensions unless they can be hoisted
301109467b48Spatrick     // into this loop's AddRec.
301209467b48Spatrick     if (!isa<SCEVAddRecExpr>(LastIncExpr))
301309467b48Spatrick       return;
301409467b48Spatrick     ++NChains;
301509467b48Spatrick     IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
301609467b48Spatrick                                  OperExprBase));
301709467b48Spatrick     ChainUsersVec.resize(NChains);
301809467b48Spatrick     LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
301909467b48Spatrick                       << ") IV=" << *LastIncExpr << "\n");
302009467b48Spatrick   } else {
302109467b48Spatrick     LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst
302209467b48Spatrick                       << ") IV+" << *LastIncExpr << "\n");
302309467b48Spatrick     // Add this IV user to the end of the chain.
302409467b48Spatrick     IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
302509467b48Spatrick   }
302609467b48Spatrick   IVChain &Chain = IVChainVec[ChainIdx];
302709467b48Spatrick 
302809467b48Spatrick   SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
302909467b48Spatrick   // This chain's NearUsers become FarUsers.
303009467b48Spatrick   if (!LastIncExpr->isZero()) {
303109467b48Spatrick     ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
303209467b48Spatrick                                             NearUsers.end());
303309467b48Spatrick     NearUsers.clear();
303409467b48Spatrick   }
303509467b48Spatrick 
303609467b48Spatrick   // All other uses of IVOperand become near uses of the chain.
303709467b48Spatrick   // We currently ignore intermediate values within SCEV expressions, assuming
303809467b48Spatrick   // they will eventually be used be the current chain, or can be computed
303909467b48Spatrick   // from one of the chain increments. To be more precise we could
304009467b48Spatrick   // transitively follow its user and only add leaf IV users to the set.
304109467b48Spatrick   for (User *U : IVOper->users()) {
304209467b48Spatrick     Instruction *OtherUse = dyn_cast<Instruction>(U);
304309467b48Spatrick     if (!OtherUse)
304409467b48Spatrick       continue;
304509467b48Spatrick     // Uses in the chain will no longer be uses if the chain is formed.
304609467b48Spatrick     // Include the head of the chain in this iteration (not Chain.begin()).
304709467b48Spatrick     IVChain::const_iterator IncIter = Chain.Incs.begin();
304809467b48Spatrick     IVChain::const_iterator IncEnd = Chain.Incs.end();
304909467b48Spatrick     for( ; IncIter != IncEnd; ++IncIter) {
305009467b48Spatrick       if (IncIter->UserInst == OtherUse)
305109467b48Spatrick         break;
305209467b48Spatrick     }
305309467b48Spatrick     if (IncIter != IncEnd)
305409467b48Spatrick       continue;
305509467b48Spatrick 
305609467b48Spatrick     if (SE.isSCEVable(OtherUse->getType())
305709467b48Spatrick         && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
305809467b48Spatrick         && IU.isIVUserOrOperand(OtherUse)) {
305909467b48Spatrick       continue;
306009467b48Spatrick     }
306109467b48Spatrick     NearUsers.insert(OtherUse);
306209467b48Spatrick   }
306309467b48Spatrick 
306409467b48Spatrick   // Since this user is part of the chain, it's no longer considered a use
306509467b48Spatrick   // of the chain.
306609467b48Spatrick   ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
306709467b48Spatrick }
306809467b48Spatrick 
306909467b48Spatrick /// Populate the vector of Chains.
307009467b48Spatrick ///
307109467b48Spatrick /// This decreases ILP at the architecture level. Targets with ample registers,
307209467b48Spatrick /// multiple memory ports, and no register renaming probably don't want
307309467b48Spatrick /// this. However, such targets should probably disable LSR altogether.
307409467b48Spatrick ///
307509467b48Spatrick /// The job of LSR is to make a reasonable choice of induction variables across
307609467b48Spatrick /// the loop. Subsequent passes can easily "unchain" computation exposing more
307709467b48Spatrick /// ILP *within the loop* if the target wants it.
307809467b48Spatrick ///
307909467b48Spatrick /// Finding the best IV chain is potentially a scheduling problem. Since LSR
308009467b48Spatrick /// will not reorder memory operations, it will recognize this as a chain, but
308109467b48Spatrick /// will generate redundant IV increments. Ideally this would be corrected later
308209467b48Spatrick /// by a smart scheduler:
308309467b48Spatrick ///        = A[i]
308409467b48Spatrick ///        = A[i+x]
308509467b48Spatrick /// A[i]   =
308609467b48Spatrick /// A[i+x] =
308709467b48Spatrick ///
308809467b48Spatrick /// TODO: Walk the entire domtree within this loop, not just the path to the
308909467b48Spatrick /// loop latch. This will discover chains on side paths, but requires
309009467b48Spatrick /// maintaining multiple copies of the Chains state.
CollectChains()309109467b48Spatrick void LSRInstance::CollectChains() {
309209467b48Spatrick   LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
309309467b48Spatrick   SmallVector<ChainUsers, 8> ChainUsersVec;
309409467b48Spatrick 
309509467b48Spatrick   SmallVector<BasicBlock *,8> LatchPath;
309609467b48Spatrick   BasicBlock *LoopHeader = L->getHeader();
309709467b48Spatrick   for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
309809467b48Spatrick        Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
309909467b48Spatrick     LatchPath.push_back(Rung->getBlock());
310009467b48Spatrick   }
310109467b48Spatrick   LatchPath.push_back(LoopHeader);
310209467b48Spatrick 
310309467b48Spatrick   // Walk the instruction stream from the loop header to the loop latch.
310409467b48Spatrick   for (BasicBlock *BB : reverse(LatchPath)) {
310509467b48Spatrick     for (Instruction &I : *BB) {
310609467b48Spatrick       // Skip instructions that weren't seen by IVUsers analysis.
310709467b48Spatrick       if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
310809467b48Spatrick         continue;
310909467b48Spatrick 
311009467b48Spatrick       // Ignore users that are part of a SCEV expression. This way we only
311109467b48Spatrick       // consider leaf IV Users. This effectively rediscovers a portion of
311209467b48Spatrick       // IVUsers analysis but in program order this time.
311309467b48Spatrick       if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
311409467b48Spatrick           continue;
311509467b48Spatrick 
311609467b48Spatrick       // Remove this instruction from any NearUsers set it may be in.
311709467b48Spatrick       for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
311809467b48Spatrick            ChainIdx < NChains; ++ChainIdx) {
311909467b48Spatrick         ChainUsersVec[ChainIdx].NearUsers.erase(&I);
312009467b48Spatrick       }
312109467b48Spatrick       // Search for operands that can be chained.
312209467b48Spatrick       SmallPtrSet<Instruction*, 4> UniqueOperands;
312309467b48Spatrick       User::op_iterator IVOpEnd = I.op_end();
312409467b48Spatrick       User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
312509467b48Spatrick       while (IVOpIter != IVOpEnd) {
312609467b48Spatrick         Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
312709467b48Spatrick         if (UniqueOperands.insert(IVOpInst).second)
312809467b48Spatrick           ChainInstruction(&I, IVOpInst, ChainUsersVec);
312909467b48Spatrick         IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
313009467b48Spatrick       }
313109467b48Spatrick     } // Continue walking down the instructions.
313209467b48Spatrick   } // Continue walking down the domtree.
313309467b48Spatrick   // Visit phi backedges to determine if the chain can generate the IV postinc.
313409467b48Spatrick   for (PHINode &PN : L->getHeader()->phis()) {
313509467b48Spatrick     if (!SE.isSCEVable(PN.getType()))
313609467b48Spatrick       continue;
313709467b48Spatrick 
313809467b48Spatrick     Instruction *IncV =
313909467b48Spatrick         dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
314009467b48Spatrick     if (IncV)
314109467b48Spatrick       ChainInstruction(&PN, IncV, ChainUsersVec);
314209467b48Spatrick   }
314309467b48Spatrick   // Remove any unprofitable chains.
314409467b48Spatrick   unsigned ChainIdx = 0;
314509467b48Spatrick   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
314609467b48Spatrick        UsersIdx < NChains; ++UsersIdx) {
314709467b48Spatrick     if (!isProfitableChain(IVChainVec[UsersIdx],
3148097a140dSpatrick                            ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
314909467b48Spatrick       continue;
315009467b48Spatrick     // Preserve the chain at UsesIdx.
315109467b48Spatrick     if (ChainIdx != UsersIdx)
315209467b48Spatrick       IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
315309467b48Spatrick     FinalizeChain(IVChainVec[ChainIdx]);
315409467b48Spatrick     ++ChainIdx;
315509467b48Spatrick   }
315609467b48Spatrick   IVChainVec.resize(ChainIdx);
315709467b48Spatrick }
315809467b48Spatrick 
FinalizeChain(IVChain & Chain)315909467b48Spatrick void LSRInstance::FinalizeChain(IVChain &Chain) {
316009467b48Spatrick   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
316109467b48Spatrick   LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
316209467b48Spatrick 
316309467b48Spatrick   for (const IVInc &Inc : Chain) {
316409467b48Spatrick     LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");
316509467b48Spatrick     auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
316609467b48Spatrick     assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
316709467b48Spatrick     IVIncSet.insert(UseI);
316809467b48Spatrick   }
316909467b48Spatrick }
317009467b48Spatrick 
317109467b48Spatrick /// Return true if the IVInc can be folded into an addressing mode.
canFoldIVIncExpr(const SCEV * IncExpr,Instruction * UserInst,Value * Operand,const TargetTransformInfo & TTI)317209467b48Spatrick static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
317309467b48Spatrick                              Value *Operand, const TargetTransformInfo &TTI) {
317409467b48Spatrick   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
317509467b48Spatrick   if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
317609467b48Spatrick     return false;
317709467b48Spatrick 
317809467b48Spatrick   if (IncConst->getAPInt().getMinSignedBits() > 64)
317909467b48Spatrick     return false;
318009467b48Spatrick 
318109467b48Spatrick   MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
318209467b48Spatrick   int64_t IncOffset = IncConst->getValue()->getSExtValue();
318309467b48Spatrick   if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
318409467b48Spatrick                         IncOffset, /*HasBaseReg=*/false))
318509467b48Spatrick     return false;
318609467b48Spatrick 
318709467b48Spatrick   return true;
318809467b48Spatrick }
318909467b48Spatrick 
319009467b48Spatrick /// Generate an add or subtract for each IVInc in a chain to materialize the IV
319109467b48Spatrick /// user's operand from the previous IV user's operand.
GenerateIVChain(const IVChain & Chain,SmallVectorImpl<WeakTrackingVH> & DeadInsts)3192*d415bd75Srobert void LSRInstance::GenerateIVChain(const IVChain &Chain,
319309467b48Spatrick                                   SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
319409467b48Spatrick   // Find the new IVOperand for the head of the chain. It may have been replaced
319509467b48Spatrick   // by LSR.
319609467b48Spatrick   const IVInc &Head = Chain.Incs[0];
319709467b48Spatrick   User::op_iterator IVOpEnd = Head.UserInst->op_end();
319809467b48Spatrick   // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
319909467b48Spatrick   User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
320009467b48Spatrick                                              IVOpEnd, L, SE);
320109467b48Spatrick   Value *IVSrc = nullptr;
320209467b48Spatrick   while (IVOpIter != IVOpEnd) {
320309467b48Spatrick     IVSrc = getWideOperand(*IVOpIter);
320409467b48Spatrick 
320509467b48Spatrick     // If this operand computes the expression that the chain needs, we may use
320609467b48Spatrick     // it. (Check this after setting IVSrc which is used below.)
320709467b48Spatrick     //
320809467b48Spatrick     // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
320909467b48Spatrick     // narrow for the chain, so we can no longer use it. We do allow using a
321009467b48Spatrick     // wider phi, assuming the LSR checked for free truncation. In that case we
321109467b48Spatrick     // should already have a truncate on this operand such that
321209467b48Spatrick     // getSCEV(IVSrc) == IncExpr.
321309467b48Spatrick     if (SE.getSCEV(*IVOpIter) == Head.IncExpr
321409467b48Spatrick         || SE.getSCEV(IVSrc) == Head.IncExpr) {
321509467b48Spatrick       break;
321609467b48Spatrick     }
321709467b48Spatrick     IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
321809467b48Spatrick   }
321909467b48Spatrick   if (IVOpIter == IVOpEnd) {
322009467b48Spatrick     // Gracefully give up on this chain.
322109467b48Spatrick     LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
322209467b48Spatrick     return;
322309467b48Spatrick   }
322409467b48Spatrick   assert(IVSrc && "Failed to find IV chain source");
322509467b48Spatrick 
322609467b48Spatrick   LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
322709467b48Spatrick   Type *IVTy = IVSrc->getType();
322809467b48Spatrick   Type *IntTy = SE.getEffectiveSCEVType(IVTy);
322909467b48Spatrick   const SCEV *LeftOverExpr = nullptr;
323009467b48Spatrick   for (const IVInc &Inc : Chain) {
323109467b48Spatrick     Instruction *InsertPt = Inc.UserInst;
323209467b48Spatrick     if (isa<PHINode>(InsertPt))
323309467b48Spatrick       InsertPt = L->getLoopLatch()->getTerminator();
323409467b48Spatrick 
323509467b48Spatrick     // IVOper will replace the current IV User's operand. IVSrc is the IV
323609467b48Spatrick     // value currently held in a register.
323709467b48Spatrick     Value *IVOper = IVSrc;
323809467b48Spatrick     if (!Inc.IncExpr->isZero()) {
323909467b48Spatrick       // IncExpr was the result of subtraction of two narrow values, so must
324009467b48Spatrick       // be signed.
324109467b48Spatrick       const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
324209467b48Spatrick       LeftOverExpr = LeftOverExpr ?
324309467b48Spatrick         SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
324409467b48Spatrick     }
324509467b48Spatrick     if (LeftOverExpr && !LeftOverExpr->isZero()) {
324609467b48Spatrick       // Expand the IV increment.
324709467b48Spatrick       Rewriter.clearPostInc();
324809467b48Spatrick       Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
324909467b48Spatrick       const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
325009467b48Spatrick                                              SE.getUnknown(IncV));
325109467b48Spatrick       IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
325209467b48Spatrick 
325309467b48Spatrick       // If an IV increment can't be folded, use it as the next IV value.
325409467b48Spatrick       if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
325509467b48Spatrick         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
325609467b48Spatrick         IVSrc = IVOper;
325709467b48Spatrick         LeftOverExpr = nullptr;
325809467b48Spatrick       }
325909467b48Spatrick     }
326009467b48Spatrick     Type *OperTy = Inc.IVOperand->getType();
326109467b48Spatrick     if (IVTy != OperTy) {
326209467b48Spatrick       assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
326309467b48Spatrick              "cannot extend a chained IV");
326409467b48Spatrick       IRBuilder<> Builder(InsertPt);
326509467b48Spatrick       IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
326609467b48Spatrick     }
326709467b48Spatrick     Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3268097a140dSpatrick     if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
3269097a140dSpatrick       DeadInsts.emplace_back(OperandIsInstr);
327009467b48Spatrick   }
327109467b48Spatrick   // If LSR created a new, wider phi, we may also replace its postinc. We only
327209467b48Spatrick   // do this if we also found a wide value for the head of the chain.
327309467b48Spatrick   if (isa<PHINode>(Chain.tailUserInst())) {
327409467b48Spatrick     for (PHINode &Phi : L->getHeader()->phis()) {
327509467b48Spatrick       if (!isCompatibleIVType(&Phi, IVSrc))
327609467b48Spatrick         continue;
327709467b48Spatrick       Instruction *PostIncV = dyn_cast<Instruction>(
327809467b48Spatrick           Phi.getIncomingValueForBlock(L->getLoopLatch()));
327909467b48Spatrick       if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
328009467b48Spatrick         continue;
328109467b48Spatrick       Value *IVOper = IVSrc;
328209467b48Spatrick       Type *PostIncTy = PostIncV->getType();
328309467b48Spatrick       if (IVTy != PostIncTy) {
328409467b48Spatrick         assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
328509467b48Spatrick         IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
328609467b48Spatrick         Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
328709467b48Spatrick         IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
328809467b48Spatrick       }
328909467b48Spatrick       Phi.replaceUsesOfWith(PostIncV, IVOper);
329009467b48Spatrick       DeadInsts.emplace_back(PostIncV);
329109467b48Spatrick     }
329209467b48Spatrick   }
329309467b48Spatrick }
329409467b48Spatrick 
CollectFixupsAndInitialFormulae()329509467b48Spatrick void LSRInstance::CollectFixupsAndInitialFormulae() {
329609467b48Spatrick   BranchInst *ExitBranch = nullptr;
3297097a140dSpatrick   bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
329809467b48Spatrick 
3299*d415bd75Srobert   // For calculating baseline cost
3300*d415bd75Srobert   SmallPtrSet<const SCEV *, 16> Regs;
3301*d415bd75Srobert   DenseSet<const SCEV *> VisitedRegs;
3302*d415bd75Srobert   DenseSet<size_t> VisitedLSRUse;
3303*d415bd75Srobert 
330409467b48Spatrick   for (const IVStrideUse &U : IU) {
330509467b48Spatrick     Instruction *UserInst = U.getUser();
330609467b48Spatrick     // Skip IV users that are part of profitable IV Chains.
330709467b48Spatrick     User::op_iterator UseI =
330809467b48Spatrick         find(UserInst->operands(), U.getOperandValToReplace());
330909467b48Spatrick     assert(UseI != UserInst->op_end() && "cannot find IV operand");
331009467b48Spatrick     if (IVIncSet.count(UseI)) {
331109467b48Spatrick       LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
331209467b48Spatrick       continue;
331309467b48Spatrick     }
331409467b48Spatrick 
331509467b48Spatrick     LSRUse::KindType Kind = LSRUse::Basic;
331609467b48Spatrick     MemAccessTy AccessTy;
331709467b48Spatrick     if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
331809467b48Spatrick       Kind = LSRUse::Address;
331909467b48Spatrick       AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
332009467b48Spatrick     }
332109467b48Spatrick 
332209467b48Spatrick     const SCEV *S = IU.getExpr(U);
332309467b48Spatrick     PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
332409467b48Spatrick 
332509467b48Spatrick     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
332609467b48Spatrick     // (N - i == 0), and this allows (N - i) to be the expression that we work
332709467b48Spatrick     // with rather than just N or i, so we can consider the register
332809467b48Spatrick     // requirements for both N and i at the same time. Limiting this code to
332909467b48Spatrick     // equality icmps is not a problem because all interesting loops use
333009467b48Spatrick     // equality icmps, thanks to IndVarSimplify.
333109467b48Spatrick     if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
333209467b48Spatrick       // If CI can be saved in some target, like replaced inside hardware loop
333309467b48Spatrick       // in PowerPC, no need to generate initial formulae for it.
333409467b48Spatrick       if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
333509467b48Spatrick         continue;
333609467b48Spatrick       if (CI->isEquality()) {
333709467b48Spatrick         // Swap the operands if needed to put the OperandValToReplace on the
333809467b48Spatrick         // left, for consistency.
333909467b48Spatrick         Value *NV = CI->getOperand(1);
334009467b48Spatrick         if (NV == U.getOperandValToReplace()) {
334109467b48Spatrick           CI->setOperand(1, CI->getOperand(0));
334209467b48Spatrick           CI->setOperand(0, NV);
334309467b48Spatrick           NV = CI->getOperand(1);
334409467b48Spatrick           Changed = true;
334509467b48Spatrick         }
334609467b48Spatrick 
334709467b48Spatrick         // x == y  -->  x - y == 0
334809467b48Spatrick         const SCEV *N = SE.getSCEV(NV);
3349*d415bd75Srobert         if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&
335073471bf0Spatrick             (!NV->getType()->isPointerTy() ||
335173471bf0Spatrick              SE.getPointerBase(N) == SE.getPointerBase(S))) {
335209467b48Spatrick           // S is normalized, so normalize N before folding it into S
335309467b48Spatrick           // to keep the result normalized.
335409467b48Spatrick           N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
335509467b48Spatrick           Kind = LSRUse::ICmpZero;
335609467b48Spatrick           S = SE.getMinusSCEV(N, S);
3357*d415bd75Srobert         } else if (L->isLoopInvariant(NV) &&
3358*d415bd75Srobert                    (!isa<Instruction>(NV) ||
3359*d415bd75Srobert                     DT.dominates(cast<Instruction>(NV), L->getHeader())) &&
3360*d415bd75Srobert                    !NV->getType()->isPointerTy()) {
3361*d415bd75Srobert           // If we can't generally expand the expression (e.g. it contains
3362*d415bd75Srobert           // a divide), but it is already at a loop invariant point before the
3363*d415bd75Srobert           // loop, wrap it in an unknown (to prevent the expander from trying
3364*d415bd75Srobert           // to re-expand in a potentially unsafe way.)  The restriction to
3365*d415bd75Srobert           // integer types is required because the unknown hides the base, and
3366*d415bd75Srobert           // SCEV can't compute the difference of two unknown pointers.
3367*d415bd75Srobert           N = SE.getUnknown(NV);
3368*d415bd75Srobert           N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
3369*d415bd75Srobert           Kind = LSRUse::ICmpZero;
3370*d415bd75Srobert           S = SE.getMinusSCEV(N, S);
3371*d415bd75Srobert           assert(!isa<SCEVCouldNotCompute>(S));
337209467b48Spatrick         }
337309467b48Spatrick 
337409467b48Spatrick         // -1 and the negations of all interesting strides (except the negation
337509467b48Spatrick         // of -1) are now also interesting.
337609467b48Spatrick         for (size_t i = 0, e = Factors.size(); i != e; ++i)
337709467b48Spatrick           if (Factors[i] != -1)
337809467b48Spatrick             Factors.insert(-(uint64_t)Factors[i]);
337909467b48Spatrick         Factors.insert(-1);
338009467b48Spatrick       }
338109467b48Spatrick     }
338209467b48Spatrick 
338309467b48Spatrick     // Get or create an LSRUse.
338409467b48Spatrick     std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
338509467b48Spatrick     size_t LUIdx = P.first;
338609467b48Spatrick     int64_t Offset = P.second;
338709467b48Spatrick     LSRUse &LU = Uses[LUIdx];
338809467b48Spatrick 
338909467b48Spatrick     // Record the fixup.
339009467b48Spatrick     LSRFixup &LF = LU.getNewFixup();
339109467b48Spatrick     LF.UserInst = UserInst;
339209467b48Spatrick     LF.OperandValToReplace = U.getOperandValToReplace();
339309467b48Spatrick     LF.PostIncLoops = TmpPostIncLoops;
339409467b48Spatrick     LF.Offset = Offset;
339509467b48Spatrick     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
339609467b48Spatrick 
3397*d415bd75Srobert     // Create SCEV as Formula for calculating baseline cost
3398*d415bd75Srobert     if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3399*d415bd75Srobert       Formula F;
3400*d415bd75Srobert       F.initialMatch(S, L, SE);
3401*d415bd75Srobert       BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
3402*d415bd75Srobert       VisitedLSRUse.insert(LUIdx);
3403*d415bd75Srobert     }
3404*d415bd75Srobert 
340509467b48Spatrick     if (!LU.WidestFixupType ||
340609467b48Spatrick         SE.getTypeSizeInBits(LU.WidestFixupType) <
340709467b48Spatrick         SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
340809467b48Spatrick       LU.WidestFixupType = LF.OperandValToReplace->getType();
340909467b48Spatrick 
341009467b48Spatrick     // If this is the first use of this LSRUse, give it a formula.
341109467b48Spatrick     if (LU.Formulae.empty()) {
341209467b48Spatrick       InsertInitialFormula(S, LU, LUIdx);
341309467b48Spatrick       CountRegisters(LU.Formulae.back(), LUIdx);
341409467b48Spatrick     }
341509467b48Spatrick   }
341609467b48Spatrick 
341709467b48Spatrick   LLVM_DEBUG(print_fixups(dbgs()));
341809467b48Spatrick }
341909467b48Spatrick 
342009467b48Spatrick /// Insert a formula for the given expression into the given use, separating out
342109467b48Spatrick /// loop-variant portions from loop-invariant and loop-computable portions.
InsertInitialFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)3422*d415bd75Srobert void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3423*d415bd75Srobert                                        size_t LUIdx) {
342409467b48Spatrick   // Mark uses whose expressions cannot be expanded.
3425*d415bd75Srobert   if (!Rewriter.isSafeToExpand(S))
342609467b48Spatrick     LU.RigidFormula = true;
342709467b48Spatrick 
342809467b48Spatrick   Formula F;
342909467b48Spatrick   F.initialMatch(S, L, SE);
343009467b48Spatrick   bool Inserted = InsertFormula(LU, LUIdx, F);
343109467b48Spatrick   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
343209467b48Spatrick }
343309467b48Spatrick 
343409467b48Spatrick /// Insert a simple single-register formula for the given expression into the
343509467b48Spatrick /// given use.
343609467b48Spatrick void
InsertSupplementalFormula(const SCEV * S,LSRUse & LU,size_t LUIdx)343709467b48Spatrick LSRInstance::InsertSupplementalFormula(const SCEV *S,
343809467b48Spatrick                                        LSRUse &LU, size_t LUIdx) {
343909467b48Spatrick   Formula F;
344009467b48Spatrick   F.BaseRegs.push_back(S);
344109467b48Spatrick   F.HasBaseReg = true;
344209467b48Spatrick   bool Inserted = InsertFormula(LU, LUIdx, F);
344309467b48Spatrick   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
344409467b48Spatrick }
344509467b48Spatrick 
344609467b48Spatrick /// Note which registers are used by the given formula, updating RegUses.
CountRegisters(const Formula & F,size_t LUIdx)344709467b48Spatrick void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
344809467b48Spatrick   if (F.ScaledReg)
344909467b48Spatrick     RegUses.countRegister(F.ScaledReg, LUIdx);
345009467b48Spatrick   for (const SCEV *BaseReg : F.BaseRegs)
345109467b48Spatrick     RegUses.countRegister(BaseReg, LUIdx);
345209467b48Spatrick }
345309467b48Spatrick 
345409467b48Spatrick /// If the given formula has not yet been inserted, add it to the list, and
345509467b48Spatrick /// return true. Return false otherwise.
InsertFormula(LSRUse & LU,unsigned LUIdx,const Formula & F)345609467b48Spatrick bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
345709467b48Spatrick   // Do not insert formula that we will not be able to expand.
345809467b48Spatrick   assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
345909467b48Spatrick          "Formula is illegal");
346009467b48Spatrick 
346109467b48Spatrick   if (!LU.InsertFormula(F, *L))
346209467b48Spatrick     return false;
346309467b48Spatrick 
346409467b48Spatrick   CountRegisters(F, LUIdx);
346509467b48Spatrick   return true;
346609467b48Spatrick }
346709467b48Spatrick 
346809467b48Spatrick /// Check for other uses of loop-invariant values which we're tracking. These
346909467b48Spatrick /// other uses will pin these values in registers, making them less profitable
347009467b48Spatrick /// for elimination.
347109467b48Spatrick /// TODO: This currently misses non-constant addrec step registers.
347209467b48Spatrick /// TODO: Should this give more weight to users inside the loop?
347309467b48Spatrick void
CollectLoopInvariantFixupsAndFormulae()347409467b48Spatrick LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
347509467b48Spatrick   SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
347609467b48Spatrick   SmallPtrSet<const SCEV *, 32> Visited;
347709467b48Spatrick 
347809467b48Spatrick   while (!Worklist.empty()) {
347909467b48Spatrick     const SCEV *S = Worklist.pop_back_val();
348009467b48Spatrick 
348109467b48Spatrick     // Don't process the same SCEV twice
348209467b48Spatrick     if (!Visited.insert(S).second)
348309467b48Spatrick       continue;
348409467b48Spatrick 
348509467b48Spatrick     if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3486*d415bd75Srobert       append_range(Worklist, N->operands());
348773471bf0Spatrick     else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
348809467b48Spatrick       Worklist.push_back(C->getOperand());
348909467b48Spatrick     else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
349009467b48Spatrick       Worklist.push_back(D->getLHS());
349109467b48Spatrick       Worklist.push_back(D->getRHS());
349209467b48Spatrick     } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
349309467b48Spatrick       const Value *V = US->getValue();
349409467b48Spatrick       if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
349509467b48Spatrick         // Look for instructions defined outside the loop.
349609467b48Spatrick         if (L->contains(Inst)) continue;
349709467b48Spatrick       } else if (isa<UndefValue>(V))
349809467b48Spatrick         // Undef doesn't have a live range, so it doesn't matter.
349909467b48Spatrick         continue;
350009467b48Spatrick       for (const Use &U : V->uses()) {
350109467b48Spatrick         const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
350209467b48Spatrick         // Ignore non-instructions.
350309467b48Spatrick         if (!UserInst)
350409467b48Spatrick           continue;
350573471bf0Spatrick         // Don't bother if the instruction is an EHPad.
350673471bf0Spatrick         if (UserInst->isEHPad())
350773471bf0Spatrick           continue;
350809467b48Spatrick         // Ignore instructions in other functions (as can happen with
350909467b48Spatrick         // Constants).
351009467b48Spatrick         if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
351109467b48Spatrick           continue;
351209467b48Spatrick         // Ignore instructions not dominated by the loop.
351309467b48Spatrick         const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
351409467b48Spatrick           UserInst->getParent() :
351509467b48Spatrick           cast<PHINode>(UserInst)->getIncomingBlock(
351609467b48Spatrick             PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
351709467b48Spatrick         if (!DT.dominates(L->getHeader(), UseBB))
351809467b48Spatrick           continue;
351909467b48Spatrick         // Don't bother if the instruction is in a BB which ends in an EHPad.
352009467b48Spatrick         if (UseBB->getTerminator()->isEHPad())
352109467b48Spatrick           continue;
3522*d415bd75Srobert 
3523*d415bd75Srobert         // Ignore cases in which the currently-examined value could come from
3524*d415bd75Srobert         // a basic block terminated with an EHPad. This checks all incoming
3525*d415bd75Srobert         // blocks of the phi node since it is possible that the same incoming
3526*d415bd75Srobert         // value comes from multiple basic blocks, only some of which may end
3527*d415bd75Srobert         // in an EHPad. If any of them do, a subsequent rewrite attempt by this
3528*d415bd75Srobert         // pass would try to insert instructions into an EHPad, hitting an
3529*d415bd75Srobert         // assertion.
3530*d415bd75Srobert         if (isa<PHINode>(UserInst)) {
3531*d415bd75Srobert           const auto *PhiNode = cast<PHINode>(UserInst);
3532*d415bd75Srobert           bool HasIncompatibleEHPTerminatedBlock = false;
3533*d415bd75Srobert           llvm::Value *ExpectedValue = U;
3534*d415bd75Srobert           for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {
3535*d415bd75Srobert             if (PhiNode->getIncomingValue(I) == ExpectedValue) {
3536*d415bd75Srobert               if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {
3537*d415bd75Srobert                 HasIncompatibleEHPTerminatedBlock = true;
3538*d415bd75Srobert                 break;
3539*d415bd75Srobert               }
3540*d415bd75Srobert             }
3541*d415bd75Srobert           }
3542*d415bd75Srobert           if (HasIncompatibleEHPTerminatedBlock) {
3543*d415bd75Srobert             continue;
3544*d415bd75Srobert           }
3545*d415bd75Srobert         }
3546*d415bd75Srobert 
354709467b48Spatrick         // Don't bother rewriting PHIs in catchswitch blocks.
354809467b48Spatrick         if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
354909467b48Spatrick           continue;
355009467b48Spatrick         // Ignore uses which are part of other SCEV expressions, to avoid
355109467b48Spatrick         // analyzing them multiple times.
355209467b48Spatrick         if (SE.isSCEVable(UserInst->getType())) {
355309467b48Spatrick           const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
355409467b48Spatrick           // If the user is a no-op, look through to its uses.
355509467b48Spatrick           if (!isa<SCEVUnknown>(UserS))
355609467b48Spatrick             continue;
355709467b48Spatrick           if (UserS == US) {
355809467b48Spatrick             Worklist.push_back(
355909467b48Spatrick               SE.getUnknown(const_cast<Instruction *>(UserInst)));
356009467b48Spatrick             continue;
356109467b48Spatrick           }
356209467b48Spatrick         }
356309467b48Spatrick         // Ignore icmp instructions which are already being analyzed.
356409467b48Spatrick         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
356509467b48Spatrick           unsigned OtherIdx = !U.getOperandNo();
356609467b48Spatrick           Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
356709467b48Spatrick           if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
356809467b48Spatrick             continue;
356909467b48Spatrick         }
357009467b48Spatrick 
357109467b48Spatrick         std::pair<size_t, int64_t> P = getUse(
357209467b48Spatrick             S, LSRUse::Basic, MemAccessTy());
357309467b48Spatrick         size_t LUIdx = P.first;
357409467b48Spatrick         int64_t Offset = P.second;
357509467b48Spatrick         LSRUse &LU = Uses[LUIdx];
357609467b48Spatrick         LSRFixup &LF = LU.getNewFixup();
357709467b48Spatrick         LF.UserInst = const_cast<Instruction *>(UserInst);
357809467b48Spatrick         LF.OperandValToReplace = U;
357909467b48Spatrick         LF.Offset = Offset;
358009467b48Spatrick         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
358109467b48Spatrick         if (!LU.WidestFixupType ||
358209467b48Spatrick             SE.getTypeSizeInBits(LU.WidestFixupType) <
358309467b48Spatrick             SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
358409467b48Spatrick           LU.WidestFixupType = LF.OperandValToReplace->getType();
358509467b48Spatrick         InsertSupplementalFormula(US, LU, LUIdx);
358609467b48Spatrick         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
358709467b48Spatrick         break;
358809467b48Spatrick       }
358909467b48Spatrick     }
359009467b48Spatrick   }
359109467b48Spatrick }
359209467b48Spatrick 
359309467b48Spatrick /// Split S into subexpressions which can be pulled out into separate
359409467b48Spatrick /// registers. If C is non-null, multiply each subexpression by C.
359509467b48Spatrick ///
359609467b48Spatrick /// Return remainder expression after factoring the subexpressions captured by
359709467b48Spatrick /// Ops. If Ops is complete, return NULL.
CollectSubexprs(const SCEV * S,const SCEVConstant * C,SmallVectorImpl<const SCEV * > & Ops,const Loop * L,ScalarEvolution & SE,unsigned Depth=0)359809467b48Spatrick static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
359909467b48Spatrick                                    SmallVectorImpl<const SCEV *> &Ops,
360009467b48Spatrick                                    const Loop *L,
360109467b48Spatrick                                    ScalarEvolution &SE,
360209467b48Spatrick                                    unsigned Depth = 0) {
360309467b48Spatrick   // Arbitrarily cap recursion to protect compile time.
360409467b48Spatrick   if (Depth >= 3)
360509467b48Spatrick     return S;
360609467b48Spatrick 
360709467b48Spatrick   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
360809467b48Spatrick     // Break out add operands.
360909467b48Spatrick     for (const SCEV *S : Add->operands()) {
361009467b48Spatrick       const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
361109467b48Spatrick       if (Remainder)
361209467b48Spatrick         Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
361309467b48Spatrick     }
361409467b48Spatrick     return nullptr;
361509467b48Spatrick   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
361609467b48Spatrick     // Split a non-zero base out of an addrec.
361709467b48Spatrick     if (AR->getStart()->isZero() || !AR->isAffine())
361809467b48Spatrick       return S;
361909467b48Spatrick 
362009467b48Spatrick     const SCEV *Remainder = CollectSubexprs(AR->getStart(),
362109467b48Spatrick                                             C, Ops, L, SE, Depth+1);
362209467b48Spatrick     // Split the non-zero AddRec unless it is part of a nested recurrence that
362309467b48Spatrick     // does not pertain to this loop.
362409467b48Spatrick     if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
362509467b48Spatrick       Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
362609467b48Spatrick       Remainder = nullptr;
362709467b48Spatrick     }
362809467b48Spatrick     if (Remainder != AR->getStart()) {
362909467b48Spatrick       if (!Remainder)
363009467b48Spatrick         Remainder = SE.getConstant(AR->getType(), 0);
363109467b48Spatrick       return SE.getAddRecExpr(Remainder,
363209467b48Spatrick                               AR->getStepRecurrence(SE),
363309467b48Spatrick                               AR->getLoop(),
363409467b48Spatrick                               //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
363509467b48Spatrick                               SCEV::FlagAnyWrap);
363609467b48Spatrick     }
363709467b48Spatrick   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
363809467b48Spatrick     // Break (C * (a + b + c)) into C*a + C*b + C*c.
363909467b48Spatrick     if (Mul->getNumOperands() != 2)
364009467b48Spatrick       return S;
364109467b48Spatrick     if (const SCEVConstant *Op0 =
364209467b48Spatrick         dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
364309467b48Spatrick       C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
364409467b48Spatrick       const SCEV *Remainder =
364509467b48Spatrick         CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
364609467b48Spatrick       if (Remainder)
364709467b48Spatrick         Ops.push_back(SE.getMulExpr(C, Remainder));
364809467b48Spatrick       return nullptr;
364909467b48Spatrick     }
365009467b48Spatrick   }
365109467b48Spatrick   return S;
365209467b48Spatrick }
365309467b48Spatrick 
365409467b48Spatrick /// Return true if the SCEV represents a value that may end up as a
365509467b48Spatrick /// post-increment operation.
mayUsePostIncMode(const TargetTransformInfo & TTI,LSRUse & LU,const SCEV * S,const Loop * L,ScalarEvolution & SE)365609467b48Spatrick static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
365709467b48Spatrick                               LSRUse &LU, const SCEV *S, const Loop *L,
365809467b48Spatrick                               ScalarEvolution &SE) {
365909467b48Spatrick   if (LU.Kind != LSRUse::Address ||
366009467b48Spatrick       !LU.AccessTy.getType()->isIntOrIntVectorTy())
366109467b48Spatrick     return false;
366209467b48Spatrick   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
366309467b48Spatrick   if (!AR)
366409467b48Spatrick     return false;
366509467b48Spatrick   const SCEV *LoopStep = AR->getStepRecurrence(SE);
366609467b48Spatrick   if (!isa<SCEVConstant>(LoopStep))
366709467b48Spatrick     return false;
366809467b48Spatrick   // Check if a post-indexed load/store can be used.
366909467b48Spatrick   if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
367009467b48Spatrick       TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
367109467b48Spatrick     const SCEV *LoopStart = AR->getStart();
367209467b48Spatrick     if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
367309467b48Spatrick       return true;
367409467b48Spatrick   }
367509467b48Spatrick   return false;
367609467b48Spatrick }
367709467b48Spatrick 
367809467b48Spatrick /// Helper function for LSRInstance::GenerateReassociations.
GenerateReassociationsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,unsigned Depth,size_t Idx,bool IsScaledReg)367909467b48Spatrick void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
368009467b48Spatrick                                              const Formula &Base,
368109467b48Spatrick                                              unsigned Depth, size_t Idx,
368209467b48Spatrick                                              bool IsScaledReg) {
368309467b48Spatrick   const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
368409467b48Spatrick   // Don't generate reassociations for the base register of a value that
368509467b48Spatrick   // may generate a post-increment operator. The reason is that the
368609467b48Spatrick   // reassociations cause extra base+register formula to be created,
368709467b48Spatrick   // and possibly chosen, but the post-increment is more efficient.
368873471bf0Spatrick   if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
368909467b48Spatrick     return;
369009467b48Spatrick   SmallVector<const SCEV *, 8> AddOps;
369109467b48Spatrick   const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
369209467b48Spatrick   if (Remainder)
369309467b48Spatrick     AddOps.push_back(Remainder);
369409467b48Spatrick 
369509467b48Spatrick   if (AddOps.size() == 1)
369609467b48Spatrick     return;
369709467b48Spatrick 
369809467b48Spatrick   for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
369909467b48Spatrick                                                      JE = AddOps.end();
370009467b48Spatrick        J != JE; ++J) {
370109467b48Spatrick     // Loop-variant "unknown" values are uninteresting; we won't be able to
370209467b48Spatrick     // do anything meaningful with them.
370309467b48Spatrick     if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
370409467b48Spatrick       continue;
370509467b48Spatrick 
370609467b48Spatrick     // Don't pull a constant into a register if the constant could be folded
370709467b48Spatrick     // into an immediate field.
370809467b48Spatrick     if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
370909467b48Spatrick                          LU.AccessTy, *J, Base.getNumRegs() > 1))
371009467b48Spatrick       continue;
371109467b48Spatrick 
371209467b48Spatrick     // Collect all operands except *J.
371309467b48Spatrick     SmallVector<const SCEV *, 8> InnerAddOps(
371409467b48Spatrick         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
371509467b48Spatrick     InnerAddOps.append(std::next(J),
371609467b48Spatrick                        ((const SmallVector<const SCEV *, 8> &)AddOps).end());
371709467b48Spatrick 
371809467b48Spatrick     // Don't leave just a constant behind in a register if the constant could
371909467b48Spatrick     // be folded into an immediate field.
372009467b48Spatrick     if (InnerAddOps.size() == 1 &&
372109467b48Spatrick         isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
372209467b48Spatrick                          LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
372309467b48Spatrick       continue;
372409467b48Spatrick 
372509467b48Spatrick     const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
372609467b48Spatrick     if (InnerSum->isZero())
372709467b48Spatrick       continue;
372809467b48Spatrick     Formula F = Base;
372909467b48Spatrick 
373009467b48Spatrick     // Add the remaining pieces of the add back into the new formula.
373109467b48Spatrick     const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
373209467b48Spatrick     if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
373309467b48Spatrick         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
373409467b48Spatrick                                 InnerSumSC->getValue()->getZExtValue())) {
373509467b48Spatrick       F.UnfoldedOffset =
373609467b48Spatrick           (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
373709467b48Spatrick       if (IsScaledReg)
373809467b48Spatrick         F.ScaledReg = nullptr;
373909467b48Spatrick       else
374009467b48Spatrick         F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
374109467b48Spatrick     } else if (IsScaledReg)
374209467b48Spatrick       F.ScaledReg = InnerSum;
374309467b48Spatrick     else
374409467b48Spatrick       F.BaseRegs[Idx] = InnerSum;
374509467b48Spatrick 
374609467b48Spatrick     // Add J as its own register, or an unfolded immediate.
374709467b48Spatrick     const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
374809467b48Spatrick     if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
374909467b48Spatrick         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
375009467b48Spatrick                                 SC->getValue()->getZExtValue()))
375109467b48Spatrick       F.UnfoldedOffset =
375209467b48Spatrick           (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
375309467b48Spatrick     else
375409467b48Spatrick       F.BaseRegs.push_back(*J);
375509467b48Spatrick     // We may have changed the number of register in base regs, adjust the
375609467b48Spatrick     // formula accordingly.
375709467b48Spatrick     F.canonicalize(*L);
375809467b48Spatrick 
375909467b48Spatrick     if (InsertFormula(LU, LUIdx, F))
376009467b48Spatrick       // If that formula hadn't been seen before, recurse to find more like
376109467b48Spatrick       // it.
376209467b48Spatrick       // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
376309467b48Spatrick       // Because just Depth is not enough to bound compile time.
376409467b48Spatrick       // This means that every time AddOps.size() is greater 16^x we will add
376509467b48Spatrick       // x to Depth.
376609467b48Spatrick       GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
376709467b48Spatrick                              Depth + 1 + (Log2_32(AddOps.size()) >> 2));
376809467b48Spatrick   }
376909467b48Spatrick }
377009467b48Spatrick 
377109467b48Spatrick /// Split out subexpressions from adds and the bases of addrecs.
GenerateReassociations(LSRUse & LU,unsigned LUIdx,Formula Base,unsigned Depth)377209467b48Spatrick void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
377309467b48Spatrick                                          Formula Base, unsigned Depth) {
377409467b48Spatrick   assert(Base.isCanonical(*L) && "Input must be in the canonical form");
377509467b48Spatrick   // Arbitrarily cap recursion to protect compile time.
377609467b48Spatrick   if (Depth >= 3)
377709467b48Spatrick     return;
377809467b48Spatrick 
377909467b48Spatrick   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
378009467b48Spatrick     GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
378109467b48Spatrick 
378209467b48Spatrick   if (Base.Scale == 1)
378309467b48Spatrick     GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
378409467b48Spatrick                                /* Idx */ -1, /* IsScaledReg */ true);
378509467b48Spatrick }
378609467b48Spatrick 
378709467b48Spatrick ///  Generate a formula consisting of all of the loop-dominating registers added
378809467b48Spatrick /// into a single register.
GenerateCombinations(LSRUse & LU,unsigned LUIdx,Formula Base)378909467b48Spatrick void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
379009467b48Spatrick                                        Formula Base) {
379109467b48Spatrick   // This method is only interesting on a plurality of registers.
379209467b48Spatrick   if (Base.BaseRegs.size() + (Base.Scale == 1) +
379309467b48Spatrick       (Base.UnfoldedOffset != 0) <= 1)
379409467b48Spatrick     return;
379509467b48Spatrick 
379609467b48Spatrick   // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
379709467b48Spatrick   // processing the formula.
379809467b48Spatrick   Base.unscale();
379909467b48Spatrick   SmallVector<const SCEV *, 4> Ops;
380009467b48Spatrick   Formula NewBase = Base;
380109467b48Spatrick   NewBase.BaseRegs.clear();
380209467b48Spatrick   Type *CombinedIntegerType = nullptr;
380309467b48Spatrick   for (const SCEV *BaseReg : Base.BaseRegs) {
380409467b48Spatrick     if (SE.properlyDominates(BaseReg, L->getHeader()) &&
380509467b48Spatrick         !SE.hasComputableLoopEvolution(BaseReg, L)) {
380609467b48Spatrick       if (!CombinedIntegerType)
380709467b48Spatrick         CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
380809467b48Spatrick       Ops.push_back(BaseReg);
380909467b48Spatrick     }
381009467b48Spatrick     else
381109467b48Spatrick       NewBase.BaseRegs.push_back(BaseReg);
381209467b48Spatrick   }
381309467b48Spatrick 
381409467b48Spatrick   // If no register is relevant, we're done.
381509467b48Spatrick   if (Ops.size() == 0)
381609467b48Spatrick     return;
381709467b48Spatrick 
381809467b48Spatrick   // Utility function for generating the required variants of the combined
381909467b48Spatrick   // registers.
382009467b48Spatrick   auto GenerateFormula = [&](const SCEV *Sum) {
382109467b48Spatrick     Formula F = NewBase;
382209467b48Spatrick 
382309467b48Spatrick     // TODO: If Sum is zero, it probably means ScalarEvolution missed an
382409467b48Spatrick     // opportunity to fold something. For now, just ignore such cases
382509467b48Spatrick     // rather than proceed with zero in a register.
382609467b48Spatrick     if (Sum->isZero())
382709467b48Spatrick       return;
382809467b48Spatrick 
382909467b48Spatrick     F.BaseRegs.push_back(Sum);
383009467b48Spatrick     F.canonicalize(*L);
383109467b48Spatrick     (void)InsertFormula(LU, LUIdx, F);
383209467b48Spatrick   };
383309467b48Spatrick 
383409467b48Spatrick   // If we collected at least two registers, generate a formula combining them.
383509467b48Spatrick   if (Ops.size() > 1) {
383609467b48Spatrick     SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
383709467b48Spatrick     GenerateFormula(SE.getAddExpr(OpsCopy));
383809467b48Spatrick   }
383909467b48Spatrick 
384009467b48Spatrick   // If we have an unfolded offset, generate a formula combining it with the
384109467b48Spatrick   // registers collected.
384209467b48Spatrick   if (NewBase.UnfoldedOffset) {
384309467b48Spatrick     assert(CombinedIntegerType && "Missing a type for the unfolded offset");
384409467b48Spatrick     Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
384509467b48Spatrick                                  true));
384609467b48Spatrick     NewBase.UnfoldedOffset = 0;
384709467b48Spatrick     GenerateFormula(SE.getAddExpr(Ops));
384809467b48Spatrick   }
384909467b48Spatrick }
385009467b48Spatrick 
385109467b48Spatrick /// Helper function for LSRInstance::GenerateSymbolicOffsets.
GenerateSymbolicOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,size_t Idx,bool IsScaledReg)385209467b48Spatrick void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
385309467b48Spatrick                                               const Formula &Base, size_t Idx,
385409467b48Spatrick                                               bool IsScaledReg) {
385509467b48Spatrick   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
385609467b48Spatrick   GlobalValue *GV = ExtractSymbol(G, SE);
385709467b48Spatrick   if (G->isZero() || !GV)
385809467b48Spatrick     return;
385909467b48Spatrick   Formula F = Base;
386009467b48Spatrick   F.BaseGV = GV;
386109467b48Spatrick   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
386209467b48Spatrick     return;
386309467b48Spatrick   if (IsScaledReg)
386409467b48Spatrick     F.ScaledReg = G;
386509467b48Spatrick   else
386609467b48Spatrick     F.BaseRegs[Idx] = G;
386709467b48Spatrick   (void)InsertFormula(LU, LUIdx, F);
386809467b48Spatrick }
386909467b48Spatrick 
387009467b48Spatrick /// Generate reuse formulae using symbolic offsets.
GenerateSymbolicOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)387109467b48Spatrick void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
387209467b48Spatrick                                           Formula Base) {
387309467b48Spatrick   // We can't add a symbolic offset if the address already contains one.
387409467b48Spatrick   if (Base.BaseGV) return;
387509467b48Spatrick 
387609467b48Spatrick   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
387709467b48Spatrick     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
387809467b48Spatrick   if (Base.Scale == 1)
387909467b48Spatrick     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
388009467b48Spatrick                                 /* IsScaledReg */ true);
388109467b48Spatrick }
388209467b48Spatrick 
388309467b48Spatrick /// Helper function for LSRInstance::GenerateConstantOffsets.
GenerateConstantOffsetsImpl(LSRUse & LU,unsigned LUIdx,const Formula & Base,const SmallVectorImpl<int64_t> & Worklist,size_t Idx,bool IsScaledReg)388409467b48Spatrick void LSRInstance::GenerateConstantOffsetsImpl(
388509467b48Spatrick     LSRUse &LU, unsigned LUIdx, const Formula &Base,
388609467b48Spatrick     const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
388709467b48Spatrick 
388809467b48Spatrick   auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
388909467b48Spatrick     Formula F = Base;
389009467b48Spatrick     F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
389109467b48Spatrick 
389273471bf0Spatrick     if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
389309467b48Spatrick       // Add the offset to the base register.
389409467b48Spatrick       const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
389509467b48Spatrick       // If it cancelled out, drop the base register, otherwise update it.
389609467b48Spatrick       if (NewG->isZero()) {
389709467b48Spatrick         if (IsScaledReg) {
389809467b48Spatrick           F.Scale = 0;
389909467b48Spatrick           F.ScaledReg = nullptr;
390009467b48Spatrick         } else
390109467b48Spatrick           F.deleteBaseReg(F.BaseRegs[Idx]);
390209467b48Spatrick         F.canonicalize(*L);
390309467b48Spatrick       } else if (IsScaledReg)
390409467b48Spatrick         F.ScaledReg = NewG;
390509467b48Spatrick       else
390609467b48Spatrick         F.BaseRegs[Idx] = NewG;
390709467b48Spatrick 
390809467b48Spatrick       (void)InsertFormula(LU, LUIdx, F);
390909467b48Spatrick     }
391009467b48Spatrick   };
391109467b48Spatrick 
391209467b48Spatrick   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
391309467b48Spatrick 
391409467b48Spatrick   // With constant offsets and constant steps, we can generate pre-inc
391509467b48Spatrick   // accesses by having the offset equal the step. So, for access #0 with a
391609467b48Spatrick   // step of 8, we generate a G - 8 base which would require the first access
391709467b48Spatrick   // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
391809467b48Spatrick   // for itself and hopefully becomes the base for other accesses. This means
391909467b48Spatrick   // means that a single pre-indexed access can be generated to become the new
392009467b48Spatrick   // base pointer for each iteration of the loop, resulting in no extra add/sub
392109467b48Spatrick   // instructions for pointer updating.
392273471bf0Spatrick   if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
392309467b48Spatrick     if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
392409467b48Spatrick       if (auto *StepRec =
392509467b48Spatrick           dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
392609467b48Spatrick         const APInt &StepInt = StepRec->getAPInt();
392709467b48Spatrick         int64_t Step = StepInt.isNegative() ?
392809467b48Spatrick           StepInt.getSExtValue() : StepInt.getZExtValue();
392909467b48Spatrick 
393009467b48Spatrick         for (int64_t Offset : Worklist) {
393109467b48Spatrick           Offset -= Step;
393209467b48Spatrick           GenerateOffset(G, Offset);
393309467b48Spatrick         }
393409467b48Spatrick       }
393509467b48Spatrick     }
393609467b48Spatrick   }
393709467b48Spatrick   for (int64_t Offset : Worklist)
393809467b48Spatrick     GenerateOffset(G, Offset);
393909467b48Spatrick 
394009467b48Spatrick   int64_t Imm = ExtractImmediate(G, SE);
394109467b48Spatrick   if (G->isZero() || Imm == 0)
394209467b48Spatrick     return;
394309467b48Spatrick   Formula F = Base;
394409467b48Spatrick   F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
394509467b48Spatrick   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
394609467b48Spatrick     return;
394773471bf0Spatrick   if (IsScaledReg) {
394809467b48Spatrick     F.ScaledReg = G;
394973471bf0Spatrick   } else {
395009467b48Spatrick     F.BaseRegs[Idx] = G;
395173471bf0Spatrick     // We may generate non canonical Formula if G is a recurrent expr reg
395273471bf0Spatrick     // related with current loop while F.ScaledReg is not.
395373471bf0Spatrick     F.canonicalize(*L);
395473471bf0Spatrick   }
395509467b48Spatrick   (void)InsertFormula(LU, LUIdx, F);
395609467b48Spatrick }
395709467b48Spatrick 
395809467b48Spatrick /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
GenerateConstantOffsets(LSRUse & LU,unsigned LUIdx,Formula Base)395909467b48Spatrick void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
396009467b48Spatrick                                           Formula Base) {
396109467b48Spatrick   // TODO: For now, just add the min and max offset, because it usually isn't
396209467b48Spatrick   // worthwhile looking at everything inbetween.
396309467b48Spatrick   SmallVector<int64_t, 2> Worklist;
396409467b48Spatrick   Worklist.push_back(LU.MinOffset);
396509467b48Spatrick   if (LU.MaxOffset != LU.MinOffset)
396609467b48Spatrick     Worklist.push_back(LU.MaxOffset);
396709467b48Spatrick 
396809467b48Spatrick   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
396909467b48Spatrick     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
397009467b48Spatrick   if (Base.Scale == 1)
397109467b48Spatrick     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
397209467b48Spatrick                                 /* IsScaledReg */ true);
397309467b48Spatrick }
397409467b48Spatrick 
397509467b48Spatrick /// For ICmpZero, check to see if we can scale up the comparison. For example, x
397609467b48Spatrick /// == y -> x*c == y*c.
GenerateICmpZeroScales(LSRUse & LU,unsigned LUIdx,Formula Base)397709467b48Spatrick void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
397809467b48Spatrick                                          Formula Base) {
397909467b48Spatrick   if (LU.Kind != LSRUse::ICmpZero) return;
398009467b48Spatrick 
398109467b48Spatrick   // Determine the integer type for the base formula.
398209467b48Spatrick   Type *IntTy = Base.getType();
398309467b48Spatrick   if (!IntTy) return;
398409467b48Spatrick   if (SE.getTypeSizeInBits(IntTy) > 64) return;
398509467b48Spatrick 
398609467b48Spatrick   // Don't do this if there is more than one offset.
398709467b48Spatrick   if (LU.MinOffset != LU.MaxOffset) return;
398809467b48Spatrick 
398909467b48Spatrick   // Check if transformation is valid. It is illegal to multiply pointer.
399009467b48Spatrick   if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
399109467b48Spatrick     return;
399209467b48Spatrick   for (const SCEV *BaseReg : Base.BaseRegs)
399309467b48Spatrick     if (BaseReg->getType()->isPointerTy())
399409467b48Spatrick       return;
399509467b48Spatrick   assert(!Base.BaseGV && "ICmpZero use is not legal!");
399609467b48Spatrick 
399709467b48Spatrick   // Check each interesting stride.
399809467b48Spatrick   for (int64_t Factor : Factors) {
3999*d415bd75Srobert     // Check that Factor can be represented by IntTy
4000*d415bd75Srobert     if (!ConstantInt::isValueValidForType(IntTy, Factor))
4001*d415bd75Srobert       continue;
400209467b48Spatrick     // Check that the multiplication doesn't overflow.
400309467b48Spatrick     if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
400409467b48Spatrick       continue;
400509467b48Spatrick     int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
400673471bf0Spatrick     assert(Factor != 0 && "Zero factor not expected!");
400709467b48Spatrick     if (NewBaseOffset / Factor != Base.BaseOffset)
400809467b48Spatrick       continue;
400909467b48Spatrick     // If the offset will be truncated at this use, check that it is in bounds.
401009467b48Spatrick     if (!IntTy->isPointerTy() &&
401109467b48Spatrick         !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
401209467b48Spatrick       continue;
401309467b48Spatrick 
401409467b48Spatrick     // Check that multiplying with the use offset doesn't overflow.
401509467b48Spatrick     int64_t Offset = LU.MinOffset;
401609467b48Spatrick     if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
401709467b48Spatrick       continue;
401809467b48Spatrick     Offset = (uint64_t)Offset * Factor;
401909467b48Spatrick     if (Offset / Factor != LU.MinOffset)
402009467b48Spatrick       continue;
402109467b48Spatrick     // If the offset will be truncated at this use, check that it is in bounds.
402209467b48Spatrick     if (!IntTy->isPointerTy() &&
402309467b48Spatrick         !ConstantInt::isValueValidForType(IntTy, Offset))
402409467b48Spatrick       continue;
402509467b48Spatrick 
402609467b48Spatrick     Formula F = Base;
402709467b48Spatrick     F.BaseOffset = NewBaseOffset;
402809467b48Spatrick 
402909467b48Spatrick     // Check that this scale is legal.
403009467b48Spatrick     if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
403109467b48Spatrick       continue;
403209467b48Spatrick 
403309467b48Spatrick     // Compensate for the use having MinOffset built into it.
403409467b48Spatrick     F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
403509467b48Spatrick 
403609467b48Spatrick     const SCEV *FactorS = SE.getConstant(IntTy, Factor);
403709467b48Spatrick 
403809467b48Spatrick     // Check that multiplying with each base register doesn't overflow.
403909467b48Spatrick     for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
404009467b48Spatrick       F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
404109467b48Spatrick       if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
404209467b48Spatrick         goto next;
404309467b48Spatrick     }
404409467b48Spatrick 
404509467b48Spatrick     // Check that multiplying with the scaled register doesn't overflow.
404609467b48Spatrick     if (F.ScaledReg) {
404709467b48Spatrick       F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
404809467b48Spatrick       if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
404909467b48Spatrick         continue;
405009467b48Spatrick     }
405109467b48Spatrick 
405209467b48Spatrick     // Check that multiplying with the unfolded offset doesn't overflow.
405309467b48Spatrick     if (F.UnfoldedOffset != 0) {
405409467b48Spatrick       if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
405509467b48Spatrick           Factor == -1)
405609467b48Spatrick         continue;
405709467b48Spatrick       F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
405809467b48Spatrick       if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
405909467b48Spatrick         continue;
406009467b48Spatrick       // If the offset will be truncated, check that it is in bounds.
406109467b48Spatrick       if (!IntTy->isPointerTy() &&
406209467b48Spatrick           !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
406309467b48Spatrick         continue;
406409467b48Spatrick     }
406509467b48Spatrick 
406609467b48Spatrick     // If we make it here and it's legal, add it.
406709467b48Spatrick     (void)InsertFormula(LU, LUIdx, F);
406809467b48Spatrick   next:;
406909467b48Spatrick   }
407009467b48Spatrick }
407109467b48Spatrick 
407209467b48Spatrick /// Generate stride factor reuse formulae by making use of scaled-offset address
407309467b48Spatrick /// modes, for example.
GenerateScales(LSRUse & LU,unsigned LUIdx,Formula Base)407409467b48Spatrick void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
407509467b48Spatrick   // Determine the integer type for the base formula.
407609467b48Spatrick   Type *IntTy = Base.getType();
407709467b48Spatrick   if (!IntTy) return;
407809467b48Spatrick 
407909467b48Spatrick   // If this Formula already has a scaled register, we can't add another one.
408009467b48Spatrick   // Try to unscale the formula to generate a better scale.
408109467b48Spatrick   if (Base.Scale != 0 && !Base.unscale())
408209467b48Spatrick     return;
408309467b48Spatrick 
408409467b48Spatrick   assert(Base.Scale == 0 && "unscale did not did its job!");
408509467b48Spatrick 
408609467b48Spatrick   // Check each interesting stride.
408709467b48Spatrick   for (int64_t Factor : Factors) {
408809467b48Spatrick     Base.Scale = Factor;
408909467b48Spatrick     Base.HasBaseReg = Base.BaseRegs.size() > 1;
409009467b48Spatrick     // Check whether this scale is going to be legal.
409109467b48Spatrick     if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
409209467b48Spatrick                     Base)) {
409309467b48Spatrick       // As a special-case, handle special out-of-loop Basic users specially.
409409467b48Spatrick       // TODO: Reconsider this special case.
409509467b48Spatrick       if (LU.Kind == LSRUse::Basic &&
409609467b48Spatrick           isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
409709467b48Spatrick                      LU.AccessTy, Base) &&
409809467b48Spatrick           LU.AllFixupsOutsideLoop)
409909467b48Spatrick         LU.Kind = LSRUse::Special;
410009467b48Spatrick       else
410109467b48Spatrick         continue;
410209467b48Spatrick     }
410309467b48Spatrick     // For an ICmpZero, negating a solitary base register won't lead to
410409467b48Spatrick     // new solutions.
410509467b48Spatrick     if (LU.Kind == LSRUse::ICmpZero &&
410609467b48Spatrick         !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
410709467b48Spatrick       continue;
410809467b48Spatrick     // For each addrec base reg, if its loop is current loop, apply the scale.
410909467b48Spatrick     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
411009467b48Spatrick       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
411109467b48Spatrick       if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
411209467b48Spatrick         const SCEV *FactorS = SE.getConstant(IntTy, Factor);
411309467b48Spatrick         if (FactorS->isZero())
411409467b48Spatrick           continue;
411509467b48Spatrick         // Divide out the factor, ignoring high bits, since we'll be
411609467b48Spatrick         // scaling the value back up in the end.
4117*d415bd75Srobert         if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
4118*d415bd75Srobert           if (!Quotient->isZero()) {
411909467b48Spatrick             // TODO: This could be optimized to avoid all the copying.
412009467b48Spatrick             Formula F = Base;
412109467b48Spatrick             F.ScaledReg = Quotient;
412209467b48Spatrick             F.deleteBaseReg(F.BaseRegs[i]);
412309467b48Spatrick             // The canonical representation of 1*reg is reg, which is already in
412409467b48Spatrick             // Base. In that case, do not try to insert the formula, it will be
412509467b48Spatrick             // rejected anyway.
412609467b48Spatrick             if (F.Scale == 1 && (F.BaseRegs.empty() ||
412709467b48Spatrick                                  (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
412809467b48Spatrick               continue;
412909467b48Spatrick             // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
413009467b48Spatrick             // non canonical Formula with ScaledReg's loop not being L.
413109467b48Spatrick             if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
413209467b48Spatrick               F.canonicalize(*L);
413309467b48Spatrick             (void)InsertFormula(LU, LUIdx, F);
413409467b48Spatrick           }
413509467b48Spatrick       }
413609467b48Spatrick     }
413709467b48Spatrick   }
413809467b48Spatrick }
413909467b48Spatrick 
414009467b48Spatrick /// Generate reuse formulae from different IV types.
GenerateTruncates(LSRUse & LU,unsigned LUIdx,Formula Base)414109467b48Spatrick void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
414209467b48Spatrick   // Don't bother truncating symbolic values.
414309467b48Spatrick   if (Base.BaseGV) return;
414409467b48Spatrick 
414509467b48Spatrick   // Determine the integer type for the base formula.
414609467b48Spatrick   Type *DstTy = Base.getType();
414709467b48Spatrick   if (!DstTy) return;
414873471bf0Spatrick   if (DstTy->isPointerTy())
414973471bf0Spatrick     return;
415009467b48Spatrick 
4151*d415bd75Srobert   // It is invalid to extend a pointer type so exit early if ScaledReg or
4152*d415bd75Srobert   // any of the BaseRegs are pointers.
4153*d415bd75Srobert   if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4154*d415bd75Srobert     return;
4155*d415bd75Srobert   if (any_of(Base.BaseRegs,
4156*d415bd75Srobert              [](const SCEV *S) { return S->getType()->isPointerTy(); }))
4157*d415bd75Srobert     return;
4158*d415bd75Srobert 
415909467b48Spatrick   for (Type *SrcTy : Types) {
416009467b48Spatrick     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
416109467b48Spatrick       Formula F = Base;
416209467b48Spatrick 
416309467b48Spatrick       // Sometimes SCEV is able to prove zero during ext transform. It may
416409467b48Spatrick       // happen if SCEV did not do all possible transforms while creating the
416509467b48Spatrick       // initial node (maybe due to depth limitations), but it can do them while
416609467b48Spatrick       // taking ext.
416709467b48Spatrick       if (F.ScaledReg) {
416809467b48Spatrick         const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
416909467b48Spatrick         if (NewScaledReg->isZero())
417009467b48Spatrick          continue;
417109467b48Spatrick         F.ScaledReg = NewScaledReg;
417209467b48Spatrick       }
417309467b48Spatrick       bool HasZeroBaseReg = false;
417409467b48Spatrick       for (const SCEV *&BaseReg : F.BaseRegs) {
417509467b48Spatrick         const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
417609467b48Spatrick         if (NewBaseReg->isZero()) {
417709467b48Spatrick           HasZeroBaseReg = true;
417809467b48Spatrick           break;
417909467b48Spatrick         }
418009467b48Spatrick         BaseReg = NewBaseReg;
418109467b48Spatrick       }
418209467b48Spatrick       if (HasZeroBaseReg)
418309467b48Spatrick         continue;
418409467b48Spatrick 
418509467b48Spatrick       // TODO: This assumes we've done basic processing on all uses and
418609467b48Spatrick       // have an idea what the register usage is.
418709467b48Spatrick       if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
418809467b48Spatrick         continue;
418909467b48Spatrick 
419009467b48Spatrick       F.canonicalize(*L);
419109467b48Spatrick       (void)InsertFormula(LU, LUIdx, F);
419209467b48Spatrick     }
419309467b48Spatrick   }
419409467b48Spatrick }
419509467b48Spatrick 
419609467b48Spatrick namespace {
419709467b48Spatrick 
419809467b48Spatrick /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
419909467b48Spatrick /// modifications so that the search phase doesn't have to worry about the data
420009467b48Spatrick /// structures moving underneath it.
420109467b48Spatrick struct WorkItem {
420209467b48Spatrick   size_t LUIdx;
420309467b48Spatrick   int64_t Imm;
420409467b48Spatrick   const SCEV *OrigReg;
420509467b48Spatrick 
WorkItem__anon3e7394c10e11::WorkItem420609467b48Spatrick   WorkItem(size_t LI, int64_t I, const SCEV *R)
420709467b48Spatrick       : LUIdx(LI), Imm(I), OrigReg(R) {}
420809467b48Spatrick 
420909467b48Spatrick   void print(raw_ostream &OS) const;
421009467b48Spatrick   void dump() const;
421109467b48Spatrick };
421209467b48Spatrick 
421309467b48Spatrick } // end anonymous namespace
421409467b48Spatrick 
421509467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & OS) const421609467b48Spatrick void WorkItem::print(raw_ostream &OS) const {
421709467b48Spatrick   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
421809467b48Spatrick      << " , add offset " << Imm;
421909467b48Spatrick }
422009467b48Spatrick 
dump() const422109467b48Spatrick LLVM_DUMP_METHOD void WorkItem::dump() const {
422209467b48Spatrick   print(errs()); errs() << '\n';
422309467b48Spatrick }
422409467b48Spatrick #endif
422509467b48Spatrick 
422609467b48Spatrick /// Look for registers which are a constant distance apart and try to form reuse
422709467b48Spatrick /// opportunities between them.
GenerateCrossUseConstantOffsets()422809467b48Spatrick void LSRInstance::GenerateCrossUseConstantOffsets() {
422909467b48Spatrick   // Group the registers by their value without any added constant offset.
423009467b48Spatrick   using ImmMapTy = std::map<int64_t, const SCEV *>;
423109467b48Spatrick 
423209467b48Spatrick   DenseMap<const SCEV *, ImmMapTy> Map;
423309467b48Spatrick   DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
423409467b48Spatrick   SmallVector<const SCEV *, 8> Sequence;
423509467b48Spatrick   for (const SCEV *Use : RegUses) {
423609467b48Spatrick     const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
423709467b48Spatrick     int64_t Imm = ExtractImmediate(Reg, SE);
423809467b48Spatrick     auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
423909467b48Spatrick     if (Pair.second)
424009467b48Spatrick       Sequence.push_back(Reg);
424109467b48Spatrick     Pair.first->second.insert(std::make_pair(Imm, Use));
424209467b48Spatrick     UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
424309467b48Spatrick   }
424409467b48Spatrick 
424509467b48Spatrick   // Now examine each set of registers with the same base value. Build up
424609467b48Spatrick   // a list of work to do and do the work in a separate step so that we're
424709467b48Spatrick   // not adding formulae and register counts while we're searching.
424809467b48Spatrick   SmallVector<WorkItem, 32> WorkItems;
424909467b48Spatrick   SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
425009467b48Spatrick   for (const SCEV *Reg : Sequence) {
425109467b48Spatrick     const ImmMapTy &Imms = Map.find(Reg)->second;
425209467b48Spatrick 
425309467b48Spatrick     // It's not worthwhile looking for reuse if there's only one offset.
425409467b48Spatrick     if (Imms.size() == 1)
425509467b48Spatrick       continue;
425609467b48Spatrick 
425709467b48Spatrick     LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
425809467b48Spatrick                for (const auto &Entry
425909467b48Spatrick                     : Imms) dbgs()
426009467b48Spatrick                << ' ' << Entry.first;
426109467b48Spatrick                dbgs() << '\n');
426209467b48Spatrick 
426309467b48Spatrick     // Examine each offset.
426409467b48Spatrick     for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
426509467b48Spatrick          J != JE; ++J) {
426609467b48Spatrick       const SCEV *OrigReg = J->second;
426709467b48Spatrick 
426809467b48Spatrick       int64_t JImm = J->first;
426909467b48Spatrick       const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
427009467b48Spatrick 
427109467b48Spatrick       if (!isa<SCEVConstant>(OrigReg) &&
427209467b48Spatrick           UsedByIndicesMap[Reg].count() == 1) {
427309467b48Spatrick         LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
427409467b48Spatrick                           << '\n');
427509467b48Spatrick         continue;
427609467b48Spatrick       }
427709467b48Spatrick 
427809467b48Spatrick       // Conservatively examine offsets between this orig reg a few selected
427909467b48Spatrick       // other orig regs.
428009467b48Spatrick       int64_t First = Imms.begin()->first;
428109467b48Spatrick       int64_t Last = std::prev(Imms.end())->first;
428209467b48Spatrick       // Compute (First + Last)  / 2 without overflow using the fact that
428309467b48Spatrick       // First + Last = 2 * (First + Last) + (First ^ Last).
428409467b48Spatrick       int64_t Avg = (First & Last) + ((First ^ Last) >> 1);
428509467b48Spatrick       // If the result is negative and First is odd and Last even (or vice versa),
428609467b48Spatrick       // we rounded towards -inf. Add 1 in that case, to round towards 0.
428709467b48Spatrick       Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63));
428809467b48Spatrick       ImmMapTy::const_iterator OtherImms[] = {
428909467b48Spatrick           Imms.begin(), std::prev(Imms.end()),
429009467b48Spatrick          Imms.lower_bound(Avg)};
4291*d415bd75Srobert       for (const auto &M : OtherImms) {
429209467b48Spatrick         if (M == J || M == JE) continue;
429309467b48Spatrick 
429409467b48Spatrick         // Compute the difference between the two.
429509467b48Spatrick         int64_t Imm = (uint64_t)JImm - M->first;
429609467b48Spatrick         for (unsigned LUIdx : UsedByIndices.set_bits())
429709467b48Spatrick           // Make a memo of this use, offset, and register tuple.
429809467b48Spatrick           if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
429909467b48Spatrick             WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
430009467b48Spatrick       }
430109467b48Spatrick     }
430209467b48Spatrick   }
430309467b48Spatrick 
430409467b48Spatrick   Map.clear();
430509467b48Spatrick   Sequence.clear();
430609467b48Spatrick   UsedByIndicesMap.clear();
430709467b48Spatrick   UniqueItems.clear();
430809467b48Spatrick 
430909467b48Spatrick   // Now iterate through the worklist and add new formulae.
431009467b48Spatrick   for (const WorkItem &WI : WorkItems) {
431109467b48Spatrick     size_t LUIdx = WI.LUIdx;
431209467b48Spatrick     LSRUse &LU = Uses[LUIdx];
431309467b48Spatrick     int64_t Imm = WI.Imm;
431409467b48Spatrick     const SCEV *OrigReg = WI.OrigReg;
431509467b48Spatrick 
431609467b48Spatrick     Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
431709467b48Spatrick     const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
431809467b48Spatrick     unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
431909467b48Spatrick 
432009467b48Spatrick     // TODO: Use a more targeted data structure.
432109467b48Spatrick     for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
432209467b48Spatrick       Formula F = LU.Formulae[L];
432309467b48Spatrick       // FIXME: The code for the scaled and unscaled registers looks
432409467b48Spatrick       // very similar but slightly different. Investigate if they
432509467b48Spatrick       // could be merged. That way, we would not have to unscale the
432609467b48Spatrick       // Formula.
432709467b48Spatrick       F.unscale();
432809467b48Spatrick       // Use the immediate in the scaled register.
432909467b48Spatrick       if (F.ScaledReg == OrigReg) {
433009467b48Spatrick         int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
433109467b48Spatrick         // Don't create 50 + reg(-50).
433209467b48Spatrick         if (F.referencesReg(SE.getSCEV(
433309467b48Spatrick                    ConstantInt::get(IntTy, -(uint64_t)Offset))))
433409467b48Spatrick           continue;
433509467b48Spatrick         Formula NewF = F;
433609467b48Spatrick         NewF.BaseOffset = Offset;
433709467b48Spatrick         if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
433809467b48Spatrick                         NewF))
433909467b48Spatrick           continue;
434009467b48Spatrick         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
434109467b48Spatrick 
434209467b48Spatrick         // If the new scale is a constant in a register, and adding the constant
434309467b48Spatrick         // value to the immediate would produce a value closer to zero than the
434409467b48Spatrick         // immediate itself, then the formula isn't worthwhile.
434509467b48Spatrick         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
434609467b48Spatrick           if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
434709467b48Spatrick               (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
434809467b48Spatrick                   .ule(std::abs(NewF.BaseOffset)))
434909467b48Spatrick             continue;
435009467b48Spatrick 
435109467b48Spatrick         // OK, looks good.
435209467b48Spatrick         NewF.canonicalize(*this->L);
435309467b48Spatrick         (void)InsertFormula(LU, LUIdx, NewF);
435409467b48Spatrick       } else {
435509467b48Spatrick         // Use the immediate in a base register.
435609467b48Spatrick         for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
435709467b48Spatrick           const SCEV *BaseReg = F.BaseRegs[N];
435809467b48Spatrick           if (BaseReg != OrigReg)
435909467b48Spatrick             continue;
436009467b48Spatrick           Formula NewF = F;
436109467b48Spatrick           NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
436209467b48Spatrick           if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
436309467b48Spatrick                           LU.Kind, LU.AccessTy, NewF)) {
436473471bf0Spatrick             if (AMK == TTI::AMK_PostIndexed &&
436509467b48Spatrick                 mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
436609467b48Spatrick               continue;
436709467b48Spatrick             if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
436809467b48Spatrick               continue;
436909467b48Spatrick             NewF = F;
437009467b48Spatrick             NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
437109467b48Spatrick           }
437209467b48Spatrick           NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
437309467b48Spatrick 
437409467b48Spatrick           // If the new formula has a constant in a register, and adding the
437509467b48Spatrick           // constant value to the immediate would produce a value closer to
437609467b48Spatrick           // zero than the immediate itself, then the formula isn't worthwhile.
437709467b48Spatrick           for (const SCEV *NewReg : NewF.BaseRegs)
437809467b48Spatrick             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
437909467b48Spatrick               if ((C->getAPInt() + NewF.BaseOffset)
438009467b48Spatrick                       .abs()
438109467b48Spatrick                       .slt(std::abs(NewF.BaseOffset)) &&
438209467b48Spatrick                   (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
438309467b48Spatrick                       countTrailingZeros<uint64_t>(NewF.BaseOffset))
438409467b48Spatrick                 goto skip_formula;
438509467b48Spatrick 
438609467b48Spatrick           // Ok, looks good.
438709467b48Spatrick           NewF.canonicalize(*this->L);
438809467b48Spatrick           (void)InsertFormula(LU, LUIdx, NewF);
438909467b48Spatrick           break;
439009467b48Spatrick         skip_formula:;
439109467b48Spatrick         }
439209467b48Spatrick       }
439309467b48Spatrick     }
439409467b48Spatrick   }
439509467b48Spatrick }
439609467b48Spatrick 
439709467b48Spatrick /// Generate formulae for each use.
439809467b48Spatrick void
GenerateAllReuseFormulae()439909467b48Spatrick LSRInstance::GenerateAllReuseFormulae() {
440009467b48Spatrick   // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
440109467b48Spatrick   // queries are more precise.
440209467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
440309467b48Spatrick     LSRUse &LU = Uses[LUIdx];
440409467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
440509467b48Spatrick       GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
440609467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
440709467b48Spatrick       GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
440809467b48Spatrick   }
440909467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
441009467b48Spatrick     LSRUse &LU = Uses[LUIdx];
441109467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
441209467b48Spatrick       GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
441309467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
441409467b48Spatrick       GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
441509467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
441609467b48Spatrick       GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
441709467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
441809467b48Spatrick       GenerateScales(LU, LUIdx, LU.Formulae[i]);
441909467b48Spatrick   }
442009467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
442109467b48Spatrick     LSRUse &LU = Uses[LUIdx];
442209467b48Spatrick     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
442309467b48Spatrick       GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
442409467b48Spatrick   }
442509467b48Spatrick 
442609467b48Spatrick   GenerateCrossUseConstantOffsets();
442709467b48Spatrick 
442809467b48Spatrick   LLVM_DEBUG(dbgs() << "\n"
442909467b48Spatrick                        "After generating reuse formulae:\n";
443009467b48Spatrick              print_uses(dbgs()));
443109467b48Spatrick }
443209467b48Spatrick 
443309467b48Spatrick /// If there are multiple formulae with the same set of registers used
443409467b48Spatrick /// by other uses, pick the best one and delete the others.
FilterOutUndesirableDedicatedRegisters()443509467b48Spatrick void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
443609467b48Spatrick   DenseSet<const SCEV *> VisitedRegs;
443709467b48Spatrick   SmallPtrSet<const SCEV *, 16> Regs;
443809467b48Spatrick   SmallPtrSet<const SCEV *, 16> LoserRegs;
443909467b48Spatrick #ifndef NDEBUG
444009467b48Spatrick   bool ChangedFormulae = false;
444109467b48Spatrick #endif
444209467b48Spatrick 
444309467b48Spatrick   // Collect the best formula for each unique set of shared registers. This
444409467b48Spatrick   // is reset for each use.
444509467b48Spatrick   using BestFormulaeTy =
444609467b48Spatrick       DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
444709467b48Spatrick 
444809467b48Spatrick   BestFormulaeTy BestFormulae;
444909467b48Spatrick 
445009467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
445109467b48Spatrick     LSRUse &LU = Uses[LUIdx];
445209467b48Spatrick     LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
445309467b48Spatrick                dbgs() << '\n');
445409467b48Spatrick 
445509467b48Spatrick     bool Any = false;
445609467b48Spatrick     for (size_t FIdx = 0, NumForms = LU.Formulae.size();
445709467b48Spatrick          FIdx != NumForms; ++FIdx) {
445809467b48Spatrick       Formula &F = LU.Formulae[FIdx];
445909467b48Spatrick 
446009467b48Spatrick       // Some formulas are instant losers. For example, they may depend on
446109467b48Spatrick       // nonexistent AddRecs from other loops. These need to be filtered
446209467b48Spatrick       // immediately, otherwise heuristics could choose them over others leading
446309467b48Spatrick       // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
446409467b48Spatrick       // avoids the need to recompute this information across formulae using the
446509467b48Spatrick       // same bad AddRec. Passing LoserRegs is also essential unless we remove
446609467b48Spatrick       // the corresponding bad register from the Regs set.
446773471bf0Spatrick       Cost CostF(L, SE, TTI, AMK);
446809467b48Spatrick       Regs.clear();
446909467b48Spatrick       CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
447009467b48Spatrick       if (CostF.isLoser()) {
447109467b48Spatrick         // During initial formula generation, undesirable formulae are generated
447209467b48Spatrick         // by uses within other loops that have some non-trivial address mode or
447309467b48Spatrick         // use the postinc form of the IV. LSR needs to provide these formulae
447409467b48Spatrick         // as the basis of rediscovering the desired formula that uses an AddRec
447509467b48Spatrick         // corresponding to the existing phi. Once all formulae have been
447609467b48Spatrick         // generated, these initial losers may be pruned.
447709467b48Spatrick         LLVM_DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
447809467b48Spatrick                    dbgs() << "\n");
447909467b48Spatrick       }
448009467b48Spatrick       else {
448109467b48Spatrick         SmallVector<const SCEV *, 4> Key;
448209467b48Spatrick         for (const SCEV *Reg : F.BaseRegs) {
448309467b48Spatrick           if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
448409467b48Spatrick             Key.push_back(Reg);
448509467b48Spatrick         }
448609467b48Spatrick         if (F.ScaledReg &&
448709467b48Spatrick             RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
448809467b48Spatrick           Key.push_back(F.ScaledReg);
448909467b48Spatrick         // Unstable sort by host order ok, because this is only used for
449009467b48Spatrick         // uniquifying.
449109467b48Spatrick         llvm::sort(Key);
449209467b48Spatrick 
449309467b48Spatrick         std::pair<BestFormulaeTy::const_iterator, bool> P =
449409467b48Spatrick           BestFormulae.insert(std::make_pair(Key, FIdx));
449509467b48Spatrick         if (P.second)
449609467b48Spatrick           continue;
449709467b48Spatrick 
449809467b48Spatrick         Formula &Best = LU.Formulae[P.first->second];
449909467b48Spatrick 
450073471bf0Spatrick         Cost CostBest(L, SE, TTI, AMK);
450109467b48Spatrick         Regs.clear();
450209467b48Spatrick         CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
450309467b48Spatrick         if (CostF.isLess(CostBest))
450409467b48Spatrick           std::swap(F, Best);
450509467b48Spatrick         LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
450609467b48Spatrick                    dbgs() << "\n"
450709467b48Spatrick                              "    in favor of formula ";
450809467b48Spatrick                    Best.print(dbgs()); dbgs() << '\n');
450909467b48Spatrick       }
451009467b48Spatrick #ifndef NDEBUG
451109467b48Spatrick       ChangedFormulae = true;
451209467b48Spatrick #endif
451309467b48Spatrick       LU.DeleteFormula(F);
451409467b48Spatrick       --FIdx;
451509467b48Spatrick       --NumForms;
451609467b48Spatrick       Any = true;
451709467b48Spatrick     }
451809467b48Spatrick 
451909467b48Spatrick     // Now that we've filtered out some formulae, recompute the Regs set.
452009467b48Spatrick     if (Any)
452109467b48Spatrick       LU.RecomputeRegs(LUIdx, RegUses);
452209467b48Spatrick 
452309467b48Spatrick     // Reset this to prepare for the next use.
452409467b48Spatrick     BestFormulae.clear();
452509467b48Spatrick   }
452609467b48Spatrick 
452709467b48Spatrick   LLVM_DEBUG(if (ChangedFormulae) {
452809467b48Spatrick     dbgs() << "\n"
452909467b48Spatrick               "After filtering out undesirable candidates:\n";
453009467b48Spatrick     print_uses(dbgs());
453109467b48Spatrick   });
453209467b48Spatrick }
453309467b48Spatrick 
453409467b48Spatrick /// Estimate the worst-case number of solutions the solver might have to
453509467b48Spatrick /// consider. It almost never considers this many solutions because it prune the
453609467b48Spatrick /// search space, but the pruning isn't always sufficient.
EstimateSearchSpaceComplexity() const453709467b48Spatrick size_t LSRInstance::EstimateSearchSpaceComplexity() const {
453809467b48Spatrick   size_t Power = 1;
453909467b48Spatrick   for (const LSRUse &LU : Uses) {
454009467b48Spatrick     size_t FSize = LU.Formulae.size();
454109467b48Spatrick     if (FSize >= ComplexityLimit) {
454209467b48Spatrick       Power = ComplexityLimit;
454309467b48Spatrick       break;
454409467b48Spatrick     }
454509467b48Spatrick     Power *= FSize;
454609467b48Spatrick     if (Power >= ComplexityLimit)
454709467b48Spatrick       break;
454809467b48Spatrick   }
454909467b48Spatrick   return Power;
455009467b48Spatrick }
455109467b48Spatrick 
455209467b48Spatrick /// When one formula uses a superset of the registers of another formula, it
455309467b48Spatrick /// won't help reduce register pressure (though it may not necessarily hurt
455409467b48Spatrick /// register pressure); remove it to simplify the system.
NarrowSearchSpaceByDetectingSupersets()455509467b48Spatrick void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
455609467b48Spatrick   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
455709467b48Spatrick     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
455809467b48Spatrick 
455909467b48Spatrick     LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
456009467b48Spatrick                          "which use a superset of registers used by other "
456109467b48Spatrick                          "formulae.\n");
456209467b48Spatrick 
456309467b48Spatrick     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
456409467b48Spatrick       LSRUse &LU = Uses[LUIdx];
456509467b48Spatrick       bool Any = false;
456609467b48Spatrick       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
456709467b48Spatrick         Formula &F = LU.Formulae[i];
456809467b48Spatrick         // Look for a formula with a constant or GV in a register. If the use
456909467b48Spatrick         // also has a formula with that same value in an immediate field,
457009467b48Spatrick         // delete the one that uses a register.
457109467b48Spatrick         for (SmallVectorImpl<const SCEV *>::const_iterator
457209467b48Spatrick              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
457309467b48Spatrick           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
457409467b48Spatrick             Formula NewF = F;
457509467b48Spatrick             //FIXME: Formulas should store bitwidth to do wrapping properly.
457609467b48Spatrick             //       See PR41034.
457709467b48Spatrick             NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
457809467b48Spatrick             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
457909467b48Spatrick                                 (I - F.BaseRegs.begin()));
458009467b48Spatrick             if (LU.HasFormulaWithSameRegs(NewF)) {
458109467b48Spatrick               LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
458209467b48Spatrick                          dbgs() << '\n');
458309467b48Spatrick               LU.DeleteFormula(F);
458409467b48Spatrick               --i;
458509467b48Spatrick               --e;
458609467b48Spatrick               Any = true;
458709467b48Spatrick               break;
458809467b48Spatrick             }
458909467b48Spatrick           } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
459009467b48Spatrick             if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
459109467b48Spatrick               if (!F.BaseGV) {
459209467b48Spatrick                 Formula NewF = F;
459309467b48Spatrick                 NewF.BaseGV = GV;
459409467b48Spatrick                 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
459509467b48Spatrick                                     (I - F.BaseRegs.begin()));
459609467b48Spatrick                 if (LU.HasFormulaWithSameRegs(NewF)) {
459709467b48Spatrick                   LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
459809467b48Spatrick                              dbgs() << '\n');
459909467b48Spatrick                   LU.DeleteFormula(F);
460009467b48Spatrick                   --i;
460109467b48Spatrick                   --e;
460209467b48Spatrick                   Any = true;
460309467b48Spatrick                   break;
460409467b48Spatrick                 }
460509467b48Spatrick               }
460609467b48Spatrick           }
460709467b48Spatrick         }
460809467b48Spatrick       }
460909467b48Spatrick       if (Any)
461009467b48Spatrick         LU.RecomputeRegs(LUIdx, RegUses);
461109467b48Spatrick     }
461209467b48Spatrick 
461309467b48Spatrick     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
461409467b48Spatrick   }
461509467b48Spatrick }
461609467b48Spatrick 
461709467b48Spatrick /// When there are many registers for expressions like A, A+1, A+2, etc.,
461809467b48Spatrick /// allocate a single register for them.
NarrowSearchSpaceByCollapsingUnrolledCode()461909467b48Spatrick void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
462009467b48Spatrick   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
462109467b48Spatrick     return;
462209467b48Spatrick 
462309467b48Spatrick   LLVM_DEBUG(
462409467b48Spatrick       dbgs() << "The search space is too complex.\n"
462509467b48Spatrick                 "Narrowing the search space by assuming that uses separated "
462609467b48Spatrick                 "by a constant offset will use the same registers.\n");
462709467b48Spatrick 
462809467b48Spatrick   // This is especially useful for unrolled loops.
462909467b48Spatrick 
463009467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
463109467b48Spatrick     LSRUse &LU = Uses[LUIdx];
463209467b48Spatrick     for (const Formula &F : LU.Formulae) {
463309467b48Spatrick       if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
463409467b48Spatrick         continue;
463509467b48Spatrick 
463609467b48Spatrick       LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
463709467b48Spatrick       if (!LUThatHas)
463809467b48Spatrick         continue;
463909467b48Spatrick 
464009467b48Spatrick       if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
464109467b48Spatrick                               LU.Kind, LU.AccessTy))
464209467b48Spatrick         continue;
464309467b48Spatrick 
464409467b48Spatrick       LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
464509467b48Spatrick 
464609467b48Spatrick       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
464709467b48Spatrick 
464809467b48Spatrick       // Transfer the fixups of LU to LUThatHas.
464909467b48Spatrick       for (LSRFixup &Fixup : LU.Fixups) {
465009467b48Spatrick         Fixup.Offset += F.BaseOffset;
465109467b48Spatrick         LUThatHas->pushFixup(Fixup);
465209467b48Spatrick         LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
465309467b48Spatrick       }
465409467b48Spatrick 
465509467b48Spatrick       // Delete formulae from the new use which are no longer legal.
465609467b48Spatrick       bool Any = false;
465709467b48Spatrick       for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
465809467b48Spatrick         Formula &F = LUThatHas->Formulae[i];
465909467b48Spatrick         if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
466009467b48Spatrick                         LUThatHas->Kind, LUThatHas->AccessTy, F)) {
466109467b48Spatrick           LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
466209467b48Spatrick           LUThatHas->DeleteFormula(F);
466309467b48Spatrick           --i;
466409467b48Spatrick           --e;
466509467b48Spatrick           Any = true;
466609467b48Spatrick         }
466709467b48Spatrick       }
466809467b48Spatrick 
466909467b48Spatrick       if (Any)
467009467b48Spatrick         LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
467109467b48Spatrick 
467209467b48Spatrick       // Delete the old use.
467309467b48Spatrick       DeleteUse(LU, LUIdx);
467409467b48Spatrick       --LUIdx;
467509467b48Spatrick       --NumUses;
467609467b48Spatrick       break;
467709467b48Spatrick     }
467809467b48Spatrick   }
467909467b48Spatrick 
468009467b48Spatrick   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
468109467b48Spatrick }
468209467b48Spatrick 
468309467b48Spatrick /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
468409467b48Spatrick /// we've done more filtering, as it may be able to find more formulae to
468509467b48Spatrick /// eliminate.
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters()468609467b48Spatrick void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
468709467b48Spatrick   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
468809467b48Spatrick     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
468909467b48Spatrick 
469009467b48Spatrick     LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
469109467b48Spatrick                          "undesirable dedicated registers.\n");
469209467b48Spatrick 
469309467b48Spatrick     FilterOutUndesirableDedicatedRegisters();
469409467b48Spatrick 
469509467b48Spatrick     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
469609467b48Spatrick   }
469709467b48Spatrick }
469809467b48Spatrick 
469909467b48Spatrick /// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
470009467b48Spatrick /// Pick the best one and delete the others.
470109467b48Spatrick /// This narrowing heuristic is to keep as many formulae with different
470209467b48Spatrick /// Scale and ScaledReg pair as possible while narrowing the search space.
470309467b48Spatrick /// The benefit is that it is more likely to find out a better solution
470409467b48Spatrick /// from a formulae set with more Scale and ScaledReg variations than
470509467b48Spatrick /// a formulae set with the same Scale and ScaledReg. The picking winner
470609467b48Spatrick /// reg heuristic will often keep the formulae with the same Scale and
470709467b48Spatrick /// ScaledReg and filter others, and we want to avoid that if possible.
NarrowSearchSpaceByFilterFormulaWithSameScaledReg()470809467b48Spatrick void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
470909467b48Spatrick   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
471009467b48Spatrick     return;
471109467b48Spatrick 
471209467b48Spatrick   LLVM_DEBUG(
471309467b48Spatrick       dbgs() << "The search space is too complex.\n"
471409467b48Spatrick                 "Narrowing the search space by choosing the best Formula "
471509467b48Spatrick                 "from the Formulae with the same Scale and ScaledReg.\n");
471609467b48Spatrick 
471709467b48Spatrick   // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
471809467b48Spatrick   using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
471909467b48Spatrick 
472009467b48Spatrick   BestFormulaeTy BestFormulae;
472109467b48Spatrick #ifndef NDEBUG
472209467b48Spatrick   bool ChangedFormulae = false;
472309467b48Spatrick #endif
472409467b48Spatrick   DenseSet<const SCEV *> VisitedRegs;
472509467b48Spatrick   SmallPtrSet<const SCEV *, 16> Regs;
472609467b48Spatrick 
472709467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
472809467b48Spatrick     LSRUse &LU = Uses[LUIdx];
472909467b48Spatrick     LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
473009467b48Spatrick                dbgs() << '\n');
473109467b48Spatrick 
473209467b48Spatrick     // Return true if Formula FA is better than Formula FB.
473309467b48Spatrick     auto IsBetterThan = [&](Formula &FA, Formula &FB) {
473409467b48Spatrick       // First we will try to choose the Formula with fewer new registers.
473509467b48Spatrick       // For a register used by current Formula, the more the register is
473609467b48Spatrick       // shared among LSRUses, the less we increase the register number
473709467b48Spatrick       // counter of the formula.
473809467b48Spatrick       size_t FARegNum = 0;
473909467b48Spatrick       for (const SCEV *Reg : FA.BaseRegs) {
474009467b48Spatrick         const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
474109467b48Spatrick         FARegNum += (NumUses - UsedByIndices.count() + 1);
474209467b48Spatrick       }
474309467b48Spatrick       size_t FBRegNum = 0;
474409467b48Spatrick       for (const SCEV *Reg : FB.BaseRegs) {
474509467b48Spatrick         const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
474609467b48Spatrick         FBRegNum += (NumUses - UsedByIndices.count() + 1);
474709467b48Spatrick       }
474809467b48Spatrick       if (FARegNum != FBRegNum)
474909467b48Spatrick         return FARegNum < FBRegNum;
475009467b48Spatrick 
475109467b48Spatrick       // If the new register numbers are the same, choose the Formula with
475209467b48Spatrick       // less Cost.
475373471bf0Spatrick       Cost CostFA(L, SE, TTI, AMK);
475473471bf0Spatrick       Cost CostFB(L, SE, TTI, AMK);
475509467b48Spatrick       Regs.clear();
475609467b48Spatrick       CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
475709467b48Spatrick       Regs.clear();
475809467b48Spatrick       CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
475909467b48Spatrick       return CostFA.isLess(CostFB);
476009467b48Spatrick     };
476109467b48Spatrick 
476209467b48Spatrick     bool Any = false;
476309467b48Spatrick     for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
476409467b48Spatrick          ++FIdx) {
476509467b48Spatrick       Formula &F = LU.Formulae[FIdx];
476609467b48Spatrick       if (!F.ScaledReg)
476709467b48Spatrick         continue;
476809467b48Spatrick       auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
476909467b48Spatrick       if (P.second)
477009467b48Spatrick         continue;
477109467b48Spatrick 
477209467b48Spatrick       Formula &Best = LU.Formulae[P.first->second];
477309467b48Spatrick       if (IsBetterThan(F, Best))
477409467b48Spatrick         std::swap(F, Best);
477509467b48Spatrick       LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
477609467b48Spatrick                  dbgs() << "\n"
477709467b48Spatrick                            "    in favor of formula ";
477809467b48Spatrick                  Best.print(dbgs()); dbgs() << '\n');
477909467b48Spatrick #ifndef NDEBUG
478009467b48Spatrick       ChangedFormulae = true;
478109467b48Spatrick #endif
478209467b48Spatrick       LU.DeleteFormula(F);
478309467b48Spatrick       --FIdx;
478409467b48Spatrick       --NumForms;
478509467b48Spatrick       Any = true;
478609467b48Spatrick     }
478709467b48Spatrick     if (Any)
478809467b48Spatrick       LU.RecomputeRegs(LUIdx, RegUses);
478909467b48Spatrick 
479009467b48Spatrick     // Reset this to prepare for the next use.
479109467b48Spatrick     BestFormulae.clear();
479209467b48Spatrick   }
479309467b48Spatrick 
479409467b48Spatrick   LLVM_DEBUG(if (ChangedFormulae) {
479509467b48Spatrick     dbgs() << "\n"
479609467b48Spatrick               "After filtering out undesirable candidates:\n";
479709467b48Spatrick     print_uses(dbgs());
479809467b48Spatrick   });
479909467b48Spatrick }
480009467b48Spatrick 
4801097a140dSpatrick /// If we are over the complexity limit, filter out any post-inc prefering
4802097a140dSpatrick /// variables to only post-inc values.
NarrowSearchSpaceByFilterPostInc()4803097a140dSpatrick void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
480473471bf0Spatrick   if (AMK != TTI::AMK_PostIndexed)
4805097a140dSpatrick     return;
4806097a140dSpatrick   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4807097a140dSpatrick     return;
4808097a140dSpatrick 
4809097a140dSpatrick   LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
4810097a140dSpatrick                        "Narrowing the search space by choosing the lowest "
4811097a140dSpatrick                        "register Formula for PostInc Uses.\n");
4812097a140dSpatrick 
4813097a140dSpatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4814097a140dSpatrick     LSRUse &LU = Uses[LUIdx];
4815097a140dSpatrick 
4816097a140dSpatrick     if (LU.Kind != LSRUse::Address)
4817097a140dSpatrick       continue;
4818097a140dSpatrick     if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&
4819097a140dSpatrick         !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))
4820097a140dSpatrick       continue;
4821097a140dSpatrick 
4822097a140dSpatrick     size_t MinRegs = std::numeric_limits<size_t>::max();
4823097a140dSpatrick     for (const Formula &F : LU.Formulae)
4824097a140dSpatrick       MinRegs = std::min(F.getNumRegs(), MinRegs);
4825097a140dSpatrick 
4826097a140dSpatrick     bool Any = false;
4827097a140dSpatrick     for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
4828097a140dSpatrick          ++FIdx) {
4829097a140dSpatrick       Formula &F = LU.Formulae[FIdx];
4830097a140dSpatrick       if (F.getNumRegs() > MinRegs) {
4831097a140dSpatrick         LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
4832097a140dSpatrick                    dbgs() << "\n");
4833097a140dSpatrick         LU.DeleteFormula(F);
4834097a140dSpatrick         --FIdx;
4835097a140dSpatrick         --NumForms;
4836097a140dSpatrick         Any = true;
4837097a140dSpatrick       }
4838097a140dSpatrick     }
4839097a140dSpatrick     if (Any)
4840097a140dSpatrick       LU.RecomputeRegs(LUIdx, RegUses);
4841097a140dSpatrick 
4842097a140dSpatrick     if (EstimateSearchSpaceComplexity() < ComplexityLimit)
4843097a140dSpatrick       break;
4844097a140dSpatrick   }
4845097a140dSpatrick 
4846097a140dSpatrick   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
4847097a140dSpatrick }
4848097a140dSpatrick 
484909467b48Spatrick /// The function delete formulas with high registers number expectation.
485009467b48Spatrick /// Assuming we don't know the value of each formula (already delete
485109467b48Spatrick /// all inefficient), generate probability of not selecting for each
485209467b48Spatrick /// register.
485309467b48Spatrick /// For example,
485409467b48Spatrick /// Use1:
485509467b48Spatrick ///  reg(a) + reg({0,+,1})
485609467b48Spatrick ///  reg(a) + reg({-1,+,1}) + 1
485709467b48Spatrick ///  reg({a,+,1})
485809467b48Spatrick /// Use2:
485909467b48Spatrick ///  reg(b) + reg({0,+,1})
486009467b48Spatrick ///  reg(b) + reg({-1,+,1}) + 1
486109467b48Spatrick ///  reg({b,+,1})
486209467b48Spatrick /// Use3:
486309467b48Spatrick ///  reg(c) + reg(b) + reg({0,+,1})
486409467b48Spatrick ///  reg(c) + reg({b,+,1})
486509467b48Spatrick ///
486609467b48Spatrick /// Probability of not selecting
486709467b48Spatrick ///                 Use1   Use2    Use3
486809467b48Spatrick /// reg(a)         (1/3) *   1   *   1
486909467b48Spatrick /// reg(b)           1   * (1/3) * (1/2)
487009467b48Spatrick /// reg({0,+,1})   (2/3) * (2/3) * (1/2)
487109467b48Spatrick /// reg({-1,+,1})  (2/3) * (2/3) *   1
487209467b48Spatrick /// reg({a,+,1})   (2/3) *   1   *   1
487309467b48Spatrick /// reg({b,+,1})     1   * (2/3) * (2/3)
487409467b48Spatrick /// reg(c)           1   *   1   *   0
487509467b48Spatrick ///
487609467b48Spatrick /// Now count registers number mathematical expectation for each formula:
487709467b48Spatrick /// Note that for each use we exclude probability if not selecting for the use.
487809467b48Spatrick /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
487909467b48Spatrick /// probabilty 1/3 of not selecting for Use1).
488009467b48Spatrick /// Use1:
488109467b48Spatrick ///  reg(a) + reg({0,+,1})          1 + 1/3       -- to be deleted
488209467b48Spatrick ///  reg(a) + reg({-1,+,1}) + 1     1 + 4/9       -- to be deleted
488309467b48Spatrick ///  reg({a,+,1})                   1
488409467b48Spatrick /// Use2:
488509467b48Spatrick ///  reg(b) + reg({0,+,1})          1/2 + 1/3     -- to be deleted
488609467b48Spatrick ///  reg(b) + reg({-1,+,1}) + 1     1/2 + 2/3     -- to be deleted
488709467b48Spatrick ///  reg({b,+,1})                   2/3
488809467b48Spatrick /// Use3:
488909467b48Spatrick ///  reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
489009467b48Spatrick ///  reg(c) + reg({b,+,1})          1 + 2/3
NarrowSearchSpaceByDeletingCostlyFormulas()489109467b48Spatrick void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
489209467b48Spatrick   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
489309467b48Spatrick     return;
489409467b48Spatrick   // Ok, we have too many of formulae on our hands to conveniently handle.
489509467b48Spatrick   // Use a rough heuristic to thin out the list.
489609467b48Spatrick 
489709467b48Spatrick   // Set of Regs wich will be 100% used in final solution.
489809467b48Spatrick   // Used in each formula of a solution (in example above this is reg(c)).
489909467b48Spatrick   // We can skip them in calculations.
490009467b48Spatrick   SmallPtrSet<const SCEV *, 4> UniqRegs;
490109467b48Spatrick   LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
490209467b48Spatrick 
490309467b48Spatrick   // Map each register to probability of not selecting
490409467b48Spatrick   DenseMap <const SCEV *, float> RegNumMap;
490509467b48Spatrick   for (const SCEV *Reg : RegUses) {
490609467b48Spatrick     if (UniqRegs.count(Reg))
490709467b48Spatrick       continue;
490809467b48Spatrick     float PNotSel = 1;
490909467b48Spatrick     for (const LSRUse &LU : Uses) {
491009467b48Spatrick       if (!LU.Regs.count(Reg))
491109467b48Spatrick         continue;
491209467b48Spatrick       float P = LU.getNotSelectedProbability(Reg);
491309467b48Spatrick       if (P != 0.0)
491409467b48Spatrick         PNotSel *= P;
491509467b48Spatrick       else
491609467b48Spatrick         UniqRegs.insert(Reg);
491709467b48Spatrick     }
491809467b48Spatrick     RegNumMap.insert(std::make_pair(Reg, PNotSel));
491909467b48Spatrick   }
492009467b48Spatrick 
492109467b48Spatrick   LLVM_DEBUG(
492209467b48Spatrick       dbgs() << "Narrowing the search space by deleting costly formulas\n");
492309467b48Spatrick 
492409467b48Spatrick   // Delete formulas where registers number expectation is high.
492509467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
492609467b48Spatrick     LSRUse &LU = Uses[LUIdx];
492709467b48Spatrick     // If nothing to delete - continue.
492809467b48Spatrick     if (LU.Formulae.size() < 2)
492909467b48Spatrick       continue;
493009467b48Spatrick     // This is temporary solution to test performance. Float should be
493109467b48Spatrick     // replaced with round independent type (based on integers) to avoid
493209467b48Spatrick     // different results for different target builds.
493309467b48Spatrick     float FMinRegNum = LU.Formulae[0].getNumRegs();
493409467b48Spatrick     float FMinARegNum = LU.Formulae[0].getNumRegs();
493509467b48Spatrick     size_t MinIdx = 0;
493609467b48Spatrick     for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
493709467b48Spatrick       Formula &F = LU.Formulae[i];
493809467b48Spatrick       float FRegNum = 0;
493909467b48Spatrick       float FARegNum = 0;
494009467b48Spatrick       for (const SCEV *BaseReg : F.BaseRegs) {
494109467b48Spatrick         if (UniqRegs.count(BaseReg))
494209467b48Spatrick           continue;
494309467b48Spatrick         FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
494409467b48Spatrick         if (isa<SCEVAddRecExpr>(BaseReg))
494509467b48Spatrick           FARegNum +=
494609467b48Spatrick               RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
494709467b48Spatrick       }
494809467b48Spatrick       if (const SCEV *ScaledReg = F.ScaledReg) {
494909467b48Spatrick         if (!UniqRegs.count(ScaledReg)) {
495009467b48Spatrick           FRegNum +=
495109467b48Spatrick               RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
495209467b48Spatrick           if (isa<SCEVAddRecExpr>(ScaledReg))
495309467b48Spatrick             FARegNum +=
495409467b48Spatrick                 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
495509467b48Spatrick         }
495609467b48Spatrick       }
495709467b48Spatrick       if (FMinRegNum > FRegNum ||
495809467b48Spatrick           (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
495909467b48Spatrick         FMinRegNum = FRegNum;
496009467b48Spatrick         FMinARegNum = FARegNum;
496109467b48Spatrick         MinIdx = i;
496209467b48Spatrick       }
496309467b48Spatrick     }
496409467b48Spatrick     LLVM_DEBUG(dbgs() << "  The formula "; LU.Formulae[MinIdx].print(dbgs());
496509467b48Spatrick                dbgs() << " with min reg num " << FMinRegNum << '\n');
496609467b48Spatrick     if (MinIdx != 0)
496709467b48Spatrick       std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
496809467b48Spatrick     while (LU.Formulae.size() != 1) {
496909467b48Spatrick       LLVM_DEBUG(dbgs() << "  Deleting "; LU.Formulae.back().print(dbgs());
497009467b48Spatrick                  dbgs() << '\n');
497109467b48Spatrick       LU.Formulae.pop_back();
497209467b48Spatrick     }
497309467b48Spatrick     LU.RecomputeRegs(LUIdx, RegUses);
497409467b48Spatrick     assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
497509467b48Spatrick     Formula &F = LU.Formulae[0];
497609467b48Spatrick     LLVM_DEBUG(dbgs() << "  Leaving only "; F.print(dbgs()); dbgs() << '\n');
497709467b48Spatrick     // When we choose the formula, the regs become unique.
497809467b48Spatrick     UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
497909467b48Spatrick     if (F.ScaledReg)
498009467b48Spatrick       UniqRegs.insert(F.ScaledReg);
498109467b48Spatrick   }
498209467b48Spatrick   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
498309467b48Spatrick }
498409467b48Spatrick 
498509467b48Spatrick /// Pick a register which seems likely to be profitable, and then in any use
498609467b48Spatrick /// which has any reference to that register, delete all formulae which do not
498709467b48Spatrick /// reference that register.
NarrowSearchSpaceByPickingWinnerRegs()498809467b48Spatrick void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
498909467b48Spatrick   // With all other options exhausted, loop until the system is simple
499009467b48Spatrick   // enough to handle.
499109467b48Spatrick   SmallPtrSet<const SCEV *, 4> Taken;
499209467b48Spatrick   while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
499309467b48Spatrick     // Ok, we have too many of formulae on our hands to conveniently handle.
499409467b48Spatrick     // Use a rough heuristic to thin out the list.
499509467b48Spatrick     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
499609467b48Spatrick 
499709467b48Spatrick     // Pick the register which is used by the most LSRUses, which is likely
499809467b48Spatrick     // to be a good reuse register candidate.
499909467b48Spatrick     const SCEV *Best = nullptr;
500009467b48Spatrick     unsigned BestNum = 0;
500109467b48Spatrick     for (const SCEV *Reg : RegUses) {
500209467b48Spatrick       if (Taken.count(Reg))
500309467b48Spatrick         continue;
500409467b48Spatrick       if (!Best) {
500509467b48Spatrick         Best = Reg;
500609467b48Spatrick         BestNum = RegUses.getUsedByIndices(Reg).count();
500709467b48Spatrick       } else {
500809467b48Spatrick         unsigned Count = RegUses.getUsedByIndices(Reg).count();
500909467b48Spatrick         if (Count > BestNum) {
501009467b48Spatrick           Best = Reg;
501109467b48Spatrick           BestNum = Count;
501209467b48Spatrick         }
501309467b48Spatrick       }
501409467b48Spatrick     }
501509467b48Spatrick     assert(Best && "Failed to find best LSRUse candidate");
501609467b48Spatrick 
501709467b48Spatrick     LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
501809467b48Spatrick                       << " will yield profitable reuse.\n");
501909467b48Spatrick     Taken.insert(Best);
502009467b48Spatrick 
502109467b48Spatrick     // In any use with formulae which references this register, delete formulae
502209467b48Spatrick     // which don't reference it.
502309467b48Spatrick     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
502409467b48Spatrick       LSRUse &LU = Uses[LUIdx];
502509467b48Spatrick       if (!LU.Regs.count(Best)) continue;
502609467b48Spatrick 
502709467b48Spatrick       bool Any = false;
502809467b48Spatrick       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
502909467b48Spatrick         Formula &F = LU.Formulae[i];
503009467b48Spatrick         if (!F.referencesReg(Best)) {
503109467b48Spatrick           LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
503209467b48Spatrick           LU.DeleteFormula(F);
503309467b48Spatrick           --e;
503409467b48Spatrick           --i;
503509467b48Spatrick           Any = true;
503609467b48Spatrick           assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
503709467b48Spatrick           continue;
503809467b48Spatrick         }
503909467b48Spatrick       }
504009467b48Spatrick 
504109467b48Spatrick       if (Any)
504209467b48Spatrick         LU.RecomputeRegs(LUIdx, RegUses);
504309467b48Spatrick     }
504409467b48Spatrick 
504509467b48Spatrick     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
504609467b48Spatrick   }
504709467b48Spatrick }
504809467b48Spatrick 
504909467b48Spatrick /// If there are an extraordinary number of formulae to choose from, use some
505009467b48Spatrick /// rough heuristics to prune down the number of formulae. This keeps the main
505109467b48Spatrick /// solver from taking an extraordinary amount of time in some worst-case
505209467b48Spatrick /// scenarios.
NarrowSearchSpaceUsingHeuristics()505309467b48Spatrick void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
505409467b48Spatrick   NarrowSearchSpaceByDetectingSupersets();
505509467b48Spatrick   NarrowSearchSpaceByCollapsingUnrolledCode();
505609467b48Spatrick   NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
505709467b48Spatrick   if (FilterSameScaledReg)
505809467b48Spatrick     NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5059097a140dSpatrick   NarrowSearchSpaceByFilterPostInc();
506009467b48Spatrick   if (LSRExpNarrow)
506109467b48Spatrick     NarrowSearchSpaceByDeletingCostlyFormulas();
506209467b48Spatrick   else
506309467b48Spatrick     NarrowSearchSpaceByPickingWinnerRegs();
506409467b48Spatrick }
506509467b48Spatrick 
506609467b48Spatrick /// This is the recursive solver.
SolveRecurse(SmallVectorImpl<const Formula * > & Solution,Cost & SolutionCost,SmallVectorImpl<const Formula * > & Workspace,const Cost & CurCost,const SmallPtrSet<const SCEV *,16> & CurRegs,DenseSet<const SCEV * > & VisitedRegs) const506709467b48Spatrick void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
506809467b48Spatrick                                Cost &SolutionCost,
506909467b48Spatrick                                SmallVectorImpl<const Formula *> &Workspace,
507009467b48Spatrick                                const Cost &CurCost,
507109467b48Spatrick                                const SmallPtrSet<const SCEV *, 16> &CurRegs,
507209467b48Spatrick                                DenseSet<const SCEV *> &VisitedRegs) const {
507309467b48Spatrick   // Some ideas:
507409467b48Spatrick   //  - prune more:
507509467b48Spatrick   //    - use more aggressive filtering
507609467b48Spatrick   //    - sort the formula so that the most profitable solutions are found first
507709467b48Spatrick   //    - sort the uses too
507809467b48Spatrick   //  - search faster:
507909467b48Spatrick   //    - don't compute a cost, and then compare. compare while computing a cost
508009467b48Spatrick   //      and bail early.
508109467b48Spatrick   //    - track register sets with SmallBitVector
508209467b48Spatrick 
508309467b48Spatrick   const LSRUse &LU = Uses[Workspace.size()];
508409467b48Spatrick 
508509467b48Spatrick   // If this use references any register that's already a part of the
508609467b48Spatrick   // in-progress solution, consider it a requirement that a formula must
508709467b48Spatrick   // reference that register in order to be considered. This prunes out
508809467b48Spatrick   // unprofitable searching.
508909467b48Spatrick   SmallSetVector<const SCEV *, 4> ReqRegs;
509009467b48Spatrick   for (const SCEV *S : CurRegs)
509109467b48Spatrick     if (LU.Regs.count(S))
509209467b48Spatrick       ReqRegs.insert(S);
509309467b48Spatrick 
509409467b48Spatrick   SmallPtrSet<const SCEV *, 16> NewRegs;
509573471bf0Spatrick   Cost NewCost(L, SE, TTI, AMK);
509609467b48Spatrick   for (const Formula &F : LU.Formulae) {
509709467b48Spatrick     // Ignore formulae which may not be ideal in terms of register reuse of
509809467b48Spatrick     // ReqRegs.  The formula should use all required registers before
509909467b48Spatrick     // introducing new ones.
5100097a140dSpatrick     // This can sometimes (notably when trying to favour postinc) lead to
5101097a140dSpatrick     // sub-optimial decisions. There it is best left to the cost modelling to
5102097a140dSpatrick     // get correct.
510373471bf0Spatrick     if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {
510409467b48Spatrick       int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
510509467b48Spatrick       for (const SCEV *Reg : ReqRegs) {
510609467b48Spatrick         if ((F.ScaledReg && F.ScaledReg == Reg) ||
510709467b48Spatrick             is_contained(F.BaseRegs, Reg)) {
510809467b48Spatrick           --NumReqRegsToFind;
510909467b48Spatrick           if (NumReqRegsToFind == 0)
511009467b48Spatrick             break;
511109467b48Spatrick         }
511209467b48Spatrick       }
511309467b48Spatrick       if (NumReqRegsToFind != 0) {
511409467b48Spatrick         // If none of the formulae satisfied the required registers, then we could
511509467b48Spatrick         // clear ReqRegs and try again. Currently, we simply give up in this case.
511609467b48Spatrick         continue;
511709467b48Spatrick       }
5118097a140dSpatrick     }
511909467b48Spatrick 
512009467b48Spatrick     // Evaluate the cost of the current formula. If it's already worse than
512109467b48Spatrick     // the current best, prune the search at that point.
512209467b48Spatrick     NewCost = CurCost;
512309467b48Spatrick     NewRegs = CurRegs;
512409467b48Spatrick     NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
512509467b48Spatrick     if (NewCost.isLess(SolutionCost)) {
512609467b48Spatrick       Workspace.push_back(&F);
512709467b48Spatrick       if (Workspace.size() != Uses.size()) {
512809467b48Spatrick         SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
512909467b48Spatrick                      NewRegs, VisitedRegs);
513009467b48Spatrick         if (F.getNumRegs() == 1 && Workspace.size() == 1)
513109467b48Spatrick           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
513209467b48Spatrick       } else {
513309467b48Spatrick         LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
513409467b48Spatrick                    dbgs() << ".\nRegs:\n";
513509467b48Spatrick                    for (const SCEV *S : NewRegs) dbgs()
513609467b48Spatrick                       << "- " << *S << "\n";
513709467b48Spatrick                    dbgs() << '\n');
513809467b48Spatrick 
513909467b48Spatrick         SolutionCost = NewCost;
514009467b48Spatrick         Solution = Workspace;
514109467b48Spatrick       }
514209467b48Spatrick       Workspace.pop_back();
514309467b48Spatrick     }
514409467b48Spatrick   }
514509467b48Spatrick }
514609467b48Spatrick 
514709467b48Spatrick /// Choose one formula from each use. Return the results in the given Solution
514809467b48Spatrick /// vector.
Solve(SmallVectorImpl<const Formula * > & Solution) const514909467b48Spatrick void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
515009467b48Spatrick   SmallVector<const Formula *, 8> Workspace;
515173471bf0Spatrick   Cost SolutionCost(L, SE, TTI, AMK);
515209467b48Spatrick   SolutionCost.Lose();
515373471bf0Spatrick   Cost CurCost(L, SE, TTI, AMK);
515409467b48Spatrick   SmallPtrSet<const SCEV *, 16> CurRegs;
515509467b48Spatrick   DenseSet<const SCEV *> VisitedRegs;
515609467b48Spatrick   Workspace.reserve(Uses.size());
515709467b48Spatrick 
515809467b48Spatrick   // SolveRecurse does all the work.
515909467b48Spatrick   SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
516009467b48Spatrick                CurRegs, VisitedRegs);
516109467b48Spatrick   if (Solution.empty()) {
516209467b48Spatrick     LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
516309467b48Spatrick     return;
516409467b48Spatrick   }
516509467b48Spatrick 
516609467b48Spatrick   // Ok, we've now made all our decisions.
516709467b48Spatrick   LLVM_DEBUG(dbgs() << "\n"
516809467b48Spatrick                        "The chosen solution requires ";
516909467b48Spatrick              SolutionCost.print(dbgs()); dbgs() << ":\n";
517009467b48Spatrick              for (size_t i = 0, e = Uses.size(); i != e; ++i) {
517109467b48Spatrick                dbgs() << "  ";
517209467b48Spatrick                Uses[i].print(dbgs());
517309467b48Spatrick                dbgs() << "\n"
517409467b48Spatrick                          "    ";
517509467b48Spatrick                Solution[i]->print(dbgs());
517609467b48Spatrick                dbgs() << '\n';
517709467b48Spatrick              });
517809467b48Spatrick 
517909467b48Spatrick   assert(Solution.size() == Uses.size() && "Malformed solution!");
5180*d415bd75Srobert 
5181*d415bd75Srobert   if (BaselineCost.isLess(SolutionCost)) {
5182*d415bd75Srobert     LLVM_DEBUG(dbgs() << "The baseline solution requires ";
5183*d415bd75Srobert                BaselineCost.print(dbgs()); dbgs() << "\n");
5184*d415bd75Srobert     if (!AllowDropSolutionIfLessProfitable)
5185*d415bd75Srobert       LLVM_DEBUG(
5186*d415bd75Srobert           dbgs() << "Baseline is more profitable than chosen solution, "
5187*d415bd75Srobert                     "add option 'lsr-drop-solution' to drop LSR solution.\n");
5188*d415bd75Srobert     else {
5189*d415bd75Srobert       LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5190*d415bd75Srobert                            "solution, dropping LSR solution.\n";);
5191*d415bd75Srobert       Solution.clear();
5192*d415bd75Srobert     }
5193*d415bd75Srobert   }
519409467b48Spatrick }
519509467b48Spatrick 
519609467b48Spatrick /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
519709467b48Spatrick /// we can go while still being dominated by the input positions. This helps
519809467b48Spatrick /// canonicalize the insert position, which encourages sharing.
519909467b48Spatrick BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,const SmallVectorImpl<Instruction * > & Inputs) const520009467b48Spatrick LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
520109467b48Spatrick                                  const SmallVectorImpl<Instruction *> &Inputs)
520209467b48Spatrick                                                                          const {
520309467b48Spatrick   Instruction *Tentative = &*IP;
520409467b48Spatrick   while (true) {
520509467b48Spatrick     bool AllDominate = true;
520609467b48Spatrick     Instruction *BetterPos = nullptr;
520709467b48Spatrick     // Don't bother attempting to insert before a catchswitch, their basic block
520809467b48Spatrick     // cannot have other non-PHI instructions.
520909467b48Spatrick     if (isa<CatchSwitchInst>(Tentative))
521009467b48Spatrick       return IP;
521109467b48Spatrick 
521209467b48Spatrick     for (Instruction *Inst : Inputs) {
521309467b48Spatrick       if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
521409467b48Spatrick         AllDominate = false;
521509467b48Spatrick         break;
521609467b48Spatrick       }
521709467b48Spatrick       // Attempt to find an insert position in the middle of the block,
521809467b48Spatrick       // instead of at the end, so that it can be used for other expansions.
521909467b48Spatrick       if (Tentative->getParent() == Inst->getParent() &&
522009467b48Spatrick           (!BetterPos || !DT.dominates(Inst, BetterPos)))
522109467b48Spatrick         BetterPos = &*std::next(BasicBlock::iterator(Inst));
522209467b48Spatrick     }
522309467b48Spatrick     if (!AllDominate)
522409467b48Spatrick       break;
522509467b48Spatrick     if (BetterPos)
522609467b48Spatrick       IP = BetterPos->getIterator();
522709467b48Spatrick     else
522809467b48Spatrick       IP = Tentative->getIterator();
522909467b48Spatrick 
523009467b48Spatrick     const Loop *IPLoop = LI.getLoopFor(IP->getParent());
523109467b48Spatrick     unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
523209467b48Spatrick 
523309467b48Spatrick     BasicBlock *IDom;
523409467b48Spatrick     for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
523509467b48Spatrick       if (!Rung) return IP;
523609467b48Spatrick       Rung = Rung->getIDom();
523709467b48Spatrick       if (!Rung) return IP;
523809467b48Spatrick       IDom = Rung->getBlock();
523909467b48Spatrick 
524009467b48Spatrick       // Don't climb into a loop though.
524109467b48Spatrick       const Loop *IDomLoop = LI.getLoopFor(IDom);
524209467b48Spatrick       unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
524309467b48Spatrick       if (IDomDepth <= IPLoopDepth &&
524409467b48Spatrick           (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
524509467b48Spatrick         break;
524609467b48Spatrick     }
524709467b48Spatrick 
524809467b48Spatrick     Tentative = IDom->getTerminator();
524909467b48Spatrick   }
525009467b48Spatrick 
525109467b48Spatrick   return IP;
525209467b48Spatrick }
525309467b48Spatrick 
525409467b48Spatrick /// Determine an input position which will be dominated by the operands and
525509467b48Spatrick /// which will dominate the result.
AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,const LSRFixup & LF,const LSRUse & LU) const5256*d415bd75Srobert BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5257*d415bd75Srobert     BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
525809467b48Spatrick   // Collect some instructions which must be dominated by the
525909467b48Spatrick   // expanding replacement. These must be dominated by any operands that
526009467b48Spatrick   // will be required in the expansion.
526109467b48Spatrick   SmallVector<Instruction *, 4> Inputs;
526209467b48Spatrick   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
526309467b48Spatrick     Inputs.push_back(I);
526409467b48Spatrick   if (LU.Kind == LSRUse::ICmpZero)
526509467b48Spatrick     if (Instruction *I =
526609467b48Spatrick           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
526709467b48Spatrick       Inputs.push_back(I);
526809467b48Spatrick   if (LF.PostIncLoops.count(L)) {
526909467b48Spatrick     if (LF.isUseFullyOutsideLoop(L))
527009467b48Spatrick       Inputs.push_back(L->getLoopLatch()->getTerminator());
527109467b48Spatrick     else
527209467b48Spatrick       Inputs.push_back(IVIncInsertPos);
527309467b48Spatrick   }
527409467b48Spatrick   // The expansion must also be dominated by the increment positions of any
527509467b48Spatrick   // loops it for which it is using post-inc mode.
527609467b48Spatrick   for (const Loop *PIL : LF.PostIncLoops) {
527709467b48Spatrick     if (PIL == L) continue;
527809467b48Spatrick 
527909467b48Spatrick     // Be dominated by the loop exit.
528009467b48Spatrick     SmallVector<BasicBlock *, 4> ExitingBlocks;
528109467b48Spatrick     PIL->getExitingBlocks(ExitingBlocks);
528209467b48Spatrick     if (!ExitingBlocks.empty()) {
528309467b48Spatrick       BasicBlock *BB = ExitingBlocks[0];
528409467b48Spatrick       for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
528509467b48Spatrick         BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
528609467b48Spatrick       Inputs.push_back(BB->getTerminator());
528709467b48Spatrick     }
528809467b48Spatrick   }
528909467b48Spatrick 
529009467b48Spatrick   assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
529109467b48Spatrick          && !isa<DbgInfoIntrinsic>(LowestIP) &&
529209467b48Spatrick          "Insertion point must be a normal instruction");
529309467b48Spatrick 
529409467b48Spatrick   // Then, climb up the immediate dominator tree as far as we can go while
529509467b48Spatrick   // still being dominated by the input positions.
529609467b48Spatrick   BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
529709467b48Spatrick 
529809467b48Spatrick   // Don't insert instructions before PHI nodes.
529909467b48Spatrick   while (isa<PHINode>(IP)) ++IP;
530009467b48Spatrick 
530109467b48Spatrick   // Ignore landingpad instructions.
530209467b48Spatrick   while (IP->isEHPad()) ++IP;
530309467b48Spatrick 
530409467b48Spatrick   // Ignore debug intrinsics.
530509467b48Spatrick   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
530609467b48Spatrick 
530709467b48Spatrick   // Set IP below instructions recently inserted by SCEVExpander. This keeps the
530809467b48Spatrick   // IP consistent across expansions and allows the previously inserted
530909467b48Spatrick   // instructions to be reused by subsequent expansion.
531009467b48Spatrick   while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
531109467b48Spatrick     ++IP;
531209467b48Spatrick 
531309467b48Spatrick   return IP;
531409467b48Spatrick }
531509467b48Spatrick 
531609467b48Spatrick /// Emit instructions for the leading candidate expression for this LSRUse (this
531709467b48Spatrick /// is called "expanding").
Expand(const LSRUse & LU,const LSRFixup & LF,const Formula & F,BasicBlock::iterator IP,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const531809467b48Spatrick Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
531909467b48Spatrick                            const Formula &F, BasicBlock::iterator IP,
532009467b48Spatrick                            SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
532109467b48Spatrick   if (LU.RigidFormula)
532209467b48Spatrick     return LF.OperandValToReplace;
532309467b48Spatrick 
532409467b48Spatrick   // Determine an input position which will be dominated by the operands and
532509467b48Spatrick   // which will dominate the result.
5326*d415bd75Srobert   IP = AdjustInsertPositionForExpand(IP, LF, LU);
532709467b48Spatrick   Rewriter.setInsertPoint(&*IP);
532809467b48Spatrick 
532909467b48Spatrick   // Inform the Rewriter if we have a post-increment use, so that it can
533009467b48Spatrick   // perform an advantageous expansion.
533109467b48Spatrick   Rewriter.setPostInc(LF.PostIncLoops);
533209467b48Spatrick 
533309467b48Spatrick   // This is the type that the user actually needs.
533409467b48Spatrick   Type *OpTy = LF.OperandValToReplace->getType();
533509467b48Spatrick   // This will be the type that we'll initially expand to.
533609467b48Spatrick   Type *Ty = F.getType();
533709467b48Spatrick   if (!Ty)
533809467b48Spatrick     // No type known; just expand directly to the ultimate type.
533909467b48Spatrick     Ty = OpTy;
534009467b48Spatrick   else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
534109467b48Spatrick     // Expand directly to the ultimate type if it's the right size.
534209467b48Spatrick     Ty = OpTy;
534309467b48Spatrick   // This is the type to do integer arithmetic in.
534409467b48Spatrick   Type *IntTy = SE.getEffectiveSCEVType(Ty);
534509467b48Spatrick 
534609467b48Spatrick   // Build up a list of operands to add together to form the full base.
534709467b48Spatrick   SmallVector<const SCEV *, 8> Ops;
534809467b48Spatrick 
534909467b48Spatrick   // Expand the BaseRegs portion.
535009467b48Spatrick   for (const SCEV *Reg : F.BaseRegs) {
535109467b48Spatrick     assert(!Reg->isZero() && "Zero allocated in a base register!");
535209467b48Spatrick 
535309467b48Spatrick     // If we're expanding for a post-inc user, make the post-inc adjustment.
535409467b48Spatrick     Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
535509467b48Spatrick     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
535609467b48Spatrick   }
535709467b48Spatrick 
535809467b48Spatrick   // Expand the ScaledReg portion.
535909467b48Spatrick   Value *ICmpScaledV = nullptr;
536009467b48Spatrick   if (F.Scale != 0) {
536109467b48Spatrick     const SCEV *ScaledS = F.ScaledReg;
536209467b48Spatrick 
536309467b48Spatrick     // If we're expanding for a post-inc user, make the post-inc adjustment.
536409467b48Spatrick     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
536509467b48Spatrick     ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
536609467b48Spatrick 
536709467b48Spatrick     if (LU.Kind == LSRUse::ICmpZero) {
536809467b48Spatrick       // Expand ScaleReg as if it was part of the base regs.
536909467b48Spatrick       if (F.Scale == 1)
537009467b48Spatrick         Ops.push_back(
537109467b48Spatrick             SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
537209467b48Spatrick       else {
537309467b48Spatrick         // An interesting way of "folding" with an icmp is to use a negated
537409467b48Spatrick         // scale, which we'll implement by inserting it into the other operand
537509467b48Spatrick         // of the icmp.
537609467b48Spatrick         assert(F.Scale == -1 &&
537709467b48Spatrick                "The only scale supported by ICmpZero uses is -1!");
537809467b48Spatrick         ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
537909467b48Spatrick       }
538009467b48Spatrick     } else {
538109467b48Spatrick       // Otherwise just expand the scaled register and an explicit scale,
538209467b48Spatrick       // which is expected to be matched as part of the address.
538309467b48Spatrick 
538409467b48Spatrick       // Flush the operand list to suppress SCEVExpander hoisting address modes.
538509467b48Spatrick       // Unless the addressing mode will not be folded.
538609467b48Spatrick       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
538709467b48Spatrick           isAMCompletelyFolded(TTI, LU, F)) {
538809467b48Spatrick         Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
538909467b48Spatrick         Ops.clear();
539009467b48Spatrick         Ops.push_back(SE.getUnknown(FullV));
539109467b48Spatrick       }
539209467b48Spatrick       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
539309467b48Spatrick       if (F.Scale != 1)
539409467b48Spatrick         ScaledS =
539509467b48Spatrick             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
539609467b48Spatrick       Ops.push_back(ScaledS);
539709467b48Spatrick     }
539809467b48Spatrick   }
539909467b48Spatrick 
540009467b48Spatrick   // Expand the GV portion.
540109467b48Spatrick   if (F.BaseGV) {
540209467b48Spatrick     // Flush the operand list to suppress SCEVExpander hoisting.
540309467b48Spatrick     if (!Ops.empty()) {
540473471bf0Spatrick       Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);
540509467b48Spatrick       Ops.clear();
540609467b48Spatrick       Ops.push_back(SE.getUnknown(FullV));
540709467b48Spatrick     }
540809467b48Spatrick     Ops.push_back(SE.getUnknown(F.BaseGV));
540909467b48Spatrick   }
541009467b48Spatrick 
541109467b48Spatrick   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
541209467b48Spatrick   // unfolded offsets. LSR assumes they both live next to their uses.
541309467b48Spatrick   if (!Ops.empty()) {
541409467b48Spatrick     Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
541509467b48Spatrick     Ops.clear();
541609467b48Spatrick     Ops.push_back(SE.getUnknown(FullV));
541709467b48Spatrick   }
541809467b48Spatrick 
541909467b48Spatrick   // Expand the immediate portion.
542009467b48Spatrick   int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
542109467b48Spatrick   if (Offset != 0) {
542209467b48Spatrick     if (LU.Kind == LSRUse::ICmpZero) {
542309467b48Spatrick       // The other interesting way of "folding" with an ICmpZero is to use a
542409467b48Spatrick       // negated immediate.
542509467b48Spatrick       if (!ICmpScaledV)
542609467b48Spatrick         ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
542709467b48Spatrick       else {
542809467b48Spatrick         Ops.push_back(SE.getUnknown(ICmpScaledV));
542909467b48Spatrick         ICmpScaledV = ConstantInt::get(IntTy, Offset);
543009467b48Spatrick       }
543109467b48Spatrick     } else {
543209467b48Spatrick       // Just add the immediate values. These again are expected to be matched
543309467b48Spatrick       // as part of the address.
543409467b48Spatrick       Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
543509467b48Spatrick     }
543609467b48Spatrick   }
543709467b48Spatrick 
543809467b48Spatrick   // Expand the unfolded offset portion.
543909467b48Spatrick   int64_t UnfoldedOffset = F.UnfoldedOffset;
544009467b48Spatrick   if (UnfoldedOffset != 0) {
544109467b48Spatrick     // Just add the immediate values.
544209467b48Spatrick     Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
544309467b48Spatrick                                                        UnfoldedOffset)));
544409467b48Spatrick   }
544509467b48Spatrick 
544609467b48Spatrick   // Emit instructions summing all the operands.
544709467b48Spatrick   const SCEV *FullS = Ops.empty() ?
544809467b48Spatrick                       SE.getConstant(IntTy, 0) :
544909467b48Spatrick                       SE.getAddExpr(Ops);
545009467b48Spatrick   Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
545109467b48Spatrick 
545209467b48Spatrick   // We're done expanding now, so reset the rewriter.
545309467b48Spatrick   Rewriter.clearPostInc();
545409467b48Spatrick 
545509467b48Spatrick   // An ICmpZero Formula represents an ICmp which we're handling as a
545609467b48Spatrick   // comparison against zero. Now that we've expanded an expression for that
545709467b48Spatrick   // form, update the ICmp's other operand.
545809467b48Spatrick   if (LU.Kind == LSRUse::ICmpZero) {
545909467b48Spatrick     ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
5460097a140dSpatrick     if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))
5461097a140dSpatrick       DeadInsts.emplace_back(OperandIsInstr);
546209467b48Spatrick     assert(!F.BaseGV && "ICmp does not support folding a global value and "
546309467b48Spatrick                            "a scale at the same time!");
546409467b48Spatrick     if (F.Scale == -1) {
546509467b48Spatrick       if (ICmpScaledV->getType() != OpTy) {
546609467b48Spatrick         Instruction *Cast =
546709467b48Spatrick           CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
546809467b48Spatrick                                                    OpTy, false),
546909467b48Spatrick                            ICmpScaledV, OpTy, "tmp", CI);
547009467b48Spatrick         ICmpScaledV = Cast;
547109467b48Spatrick       }
547209467b48Spatrick       CI->setOperand(1, ICmpScaledV);
547309467b48Spatrick     } else {
547409467b48Spatrick       // A scale of 1 means that the scale has been expanded as part of the
547509467b48Spatrick       // base regs.
547609467b48Spatrick       assert((F.Scale == 0 || F.Scale == 1) &&
547709467b48Spatrick              "ICmp does not support folding a global value and "
547809467b48Spatrick              "a scale at the same time!");
547909467b48Spatrick       Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
548009467b48Spatrick                                            -(uint64_t)Offset);
548109467b48Spatrick       if (C->getType() != OpTy)
548209467b48Spatrick         C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
548309467b48Spatrick                                                           OpTy, false),
548409467b48Spatrick                                   C, OpTy);
548509467b48Spatrick 
548609467b48Spatrick       CI->setOperand(1, C);
548709467b48Spatrick     }
548809467b48Spatrick   }
548909467b48Spatrick 
549009467b48Spatrick   return FullV;
549109467b48Spatrick }
549209467b48Spatrick 
549309467b48Spatrick /// Helper for Rewrite. PHI nodes are special because the use of their operands
549409467b48Spatrick /// effectively happens in their predecessor blocks, so the expression may need
549509467b48Spatrick /// to be expanded in multiple places.
RewriteForPHI(PHINode * PN,const LSRUse & LU,const LSRFixup & LF,const Formula & F,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const549609467b48Spatrick void LSRInstance::RewriteForPHI(
549709467b48Spatrick     PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5498*d415bd75Srobert     SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
549909467b48Spatrick   DenseMap<BasicBlock *, Value *> Inserted;
550009467b48Spatrick   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
550109467b48Spatrick     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
550209467b48Spatrick       bool needUpdateFixups = false;
550309467b48Spatrick       BasicBlock *BB = PN->getIncomingBlock(i);
550409467b48Spatrick 
550509467b48Spatrick       // If this is a critical edge, split the edge so that we do not insert
550609467b48Spatrick       // the code on all predecessor/successor paths.  We do this unless this
550709467b48Spatrick       // is the canonical backedge for this loop, which complicates post-inc
550809467b48Spatrick       // users.
550909467b48Spatrick       if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
551009467b48Spatrick           !isa<IndirectBrInst>(BB->getTerminator()) &&
551109467b48Spatrick           !isa<CatchSwitchInst>(BB->getTerminator())) {
551209467b48Spatrick         BasicBlock *Parent = PN->getParent();
551309467b48Spatrick         Loop *PNLoop = LI.getLoopFor(Parent);
551409467b48Spatrick         if (!PNLoop || Parent != PNLoop->getHeader()) {
551509467b48Spatrick           // Split the critical edge.
551609467b48Spatrick           BasicBlock *NewBB = nullptr;
551709467b48Spatrick           if (!Parent->isLandingPad()) {
551873471bf0Spatrick             NewBB =
551973471bf0Spatrick                 SplitCriticalEdge(BB, Parent,
552073471bf0Spatrick                                   CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
552109467b48Spatrick                                       .setMergeIdenticalEdges()
552209467b48Spatrick                                       .setKeepOneInputPHIs());
552309467b48Spatrick           } else {
552409467b48Spatrick             SmallVector<BasicBlock*, 2> NewBBs;
552509467b48Spatrick             SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
552609467b48Spatrick             NewBB = NewBBs[0];
552709467b48Spatrick           }
552809467b48Spatrick           // If NewBB==NULL, then SplitCriticalEdge refused to split because all
552909467b48Spatrick           // phi predecessors are identical. The simple thing to do is skip
553009467b48Spatrick           // splitting in this case rather than complicate the API.
553109467b48Spatrick           if (NewBB) {
553209467b48Spatrick             // If PN is outside of the loop and BB is in the loop, we want to
553309467b48Spatrick             // move the block to be immediately before the PHI block, not
553409467b48Spatrick             // immediately after BB.
553509467b48Spatrick             if (L->contains(BB) && !L->contains(PN))
553609467b48Spatrick               NewBB->moveBefore(PN->getParent());
553709467b48Spatrick 
553809467b48Spatrick             // Splitting the edge can reduce the number of PHI entries we have.
553909467b48Spatrick             e = PN->getNumIncomingValues();
554009467b48Spatrick             BB = NewBB;
554109467b48Spatrick             i = PN->getBasicBlockIndex(BB);
554209467b48Spatrick 
554309467b48Spatrick             needUpdateFixups = true;
554409467b48Spatrick           }
554509467b48Spatrick         }
554609467b48Spatrick       }
554709467b48Spatrick 
554809467b48Spatrick       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
554909467b48Spatrick         Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
555009467b48Spatrick       if (!Pair.second)
555109467b48Spatrick         PN->setIncomingValue(i, Pair.first->second);
555209467b48Spatrick       else {
5553*d415bd75Srobert         Value *FullV =
5554*d415bd75Srobert             Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);
555509467b48Spatrick 
555609467b48Spatrick         // If this is reuse-by-noop-cast, insert the noop cast.
555709467b48Spatrick         Type *OpTy = LF.OperandValToReplace->getType();
555809467b48Spatrick         if (FullV->getType() != OpTy)
555909467b48Spatrick           FullV =
556009467b48Spatrick             CastInst::Create(CastInst::getCastOpcode(FullV, false,
556109467b48Spatrick                                                      OpTy, false),
556209467b48Spatrick                              FullV, LF.OperandValToReplace->getType(),
556309467b48Spatrick                              "tmp", BB->getTerminator());
556409467b48Spatrick 
556509467b48Spatrick         PN->setIncomingValue(i, FullV);
556609467b48Spatrick         Pair.first->second = FullV;
556709467b48Spatrick       }
556809467b48Spatrick 
556909467b48Spatrick       // If LSR splits critical edge and phi node has other pending
557009467b48Spatrick       // fixup operands, we need to update those pending fixups. Otherwise
557109467b48Spatrick       // formulae will not be implemented completely and some instructions
557209467b48Spatrick       // will not be eliminated.
557309467b48Spatrick       if (needUpdateFixups) {
557409467b48Spatrick         for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
557509467b48Spatrick           for (LSRFixup &Fixup : Uses[LUIdx].Fixups)
557609467b48Spatrick             // If fixup is supposed to rewrite some operand in the phi
557709467b48Spatrick             // that was just updated, it may be already moved to
557809467b48Spatrick             // another phi node. Such fixup requires update.
557909467b48Spatrick             if (Fixup.UserInst == PN) {
558009467b48Spatrick               // Check if the operand we try to replace still exists in the
558109467b48Spatrick               // original phi.
558209467b48Spatrick               bool foundInOriginalPHI = false;
558309467b48Spatrick               for (const auto &val : PN->incoming_values())
558409467b48Spatrick                 if (val == Fixup.OperandValToReplace) {
558509467b48Spatrick                   foundInOriginalPHI = true;
558609467b48Spatrick                   break;
558709467b48Spatrick                 }
558809467b48Spatrick 
558909467b48Spatrick               // If fixup operand found in original PHI - nothing to do.
559009467b48Spatrick               if (foundInOriginalPHI)
559109467b48Spatrick                 continue;
559209467b48Spatrick 
559309467b48Spatrick               // Otherwise it might be moved to another PHI and requires update.
559409467b48Spatrick               // If fixup operand not found in any of the incoming blocks that
559509467b48Spatrick               // means we have already rewritten it - nothing to do.
559609467b48Spatrick               for (const auto &Block : PN->blocks())
559709467b48Spatrick                 for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
559809467b48Spatrick                      ++I) {
559909467b48Spatrick                   PHINode *NewPN = cast<PHINode>(I);
560009467b48Spatrick                   for (const auto &val : NewPN->incoming_values())
560109467b48Spatrick                     if (val == Fixup.OperandValToReplace)
560209467b48Spatrick                       Fixup.UserInst = NewPN;
560309467b48Spatrick                 }
560409467b48Spatrick             }
560509467b48Spatrick       }
560609467b48Spatrick     }
560709467b48Spatrick }
560809467b48Spatrick 
560909467b48Spatrick /// Emit instructions for the leading candidate expression for this LSRUse (this
561009467b48Spatrick /// is called "expanding"), and update the UserInst to reference the newly
561109467b48Spatrick /// expanded value.
Rewrite(const LSRUse & LU,const LSRFixup & LF,const Formula & F,SmallVectorImpl<WeakTrackingVH> & DeadInsts) const561209467b48Spatrick void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
5613*d415bd75Srobert                           const Formula &F,
561409467b48Spatrick                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
561509467b48Spatrick   // First, find an insertion point that dominates UserInst. For PHI nodes,
561609467b48Spatrick   // find the nearest block which dominates all the relevant uses.
561709467b48Spatrick   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
5618*d415bd75Srobert     RewriteForPHI(PN, LU, LF, F, DeadInsts);
561909467b48Spatrick   } else {
5620*d415bd75Srobert     Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);
562109467b48Spatrick 
562209467b48Spatrick     // If this is reuse-by-noop-cast, insert the noop cast.
562309467b48Spatrick     Type *OpTy = LF.OperandValToReplace->getType();
562409467b48Spatrick     if (FullV->getType() != OpTy) {
562509467b48Spatrick       Instruction *Cast =
562609467b48Spatrick         CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
562709467b48Spatrick                          FullV, OpTy, "tmp", LF.UserInst);
562809467b48Spatrick       FullV = Cast;
562909467b48Spatrick     }
563009467b48Spatrick 
563109467b48Spatrick     // Update the user. ICmpZero is handled specially here (for now) because
563209467b48Spatrick     // Expand may have updated one of the operands of the icmp already, and
563309467b48Spatrick     // its new value may happen to be equal to LF.OperandValToReplace, in
563409467b48Spatrick     // which case doing replaceUsesOfWith leads to replacing both operands
563509467b48Spatrick     // with the same value. TODO: Reorganize this.
563609467b48Spatrick     if (LU.Kind == LSRUse::ICmpZero)
563709467b48Spatrick       LF.UserInst->setOperand(0, FullV);
563809467b48Spatrick     else
563909467b48Spatrick       LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
564009467b48Spatrick   }
564109467b48Spatrick 
5642097a140dSpatrick   if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
5643097a140dSpatrick     DeadInsts.emplace_back(OperandIsInstr);
564409467b48Spatrick }
564509467b48Spatrick 
564609467b48Spatrick /// Rewrite all the fixup locations with new values, following the chosen
564709467b48Spatrick /// solution.
ImplementSolution(const SmallVectorImpl<const Formula * > & Solution)564809467b48Spatrick void LSRInstance::ImplementSolution(
564909467b48Spatrick     const SmallVectorImpl<const Formula *> &Solution) {
565009467b48Spatrick   // Keep track of instructions we may have made dead, so that
565109467b48Spatrick   // we can remove them after we are done working.
565209467b48Spatrick   SmallVector<WeakTrackingVH, 16> DeadInsts;
565309467b48Spatrick 
565409467b48Spatrick   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
565509467b48Spatrick 
565609467b48Spatrick   // Mark phi nodes that terminate chains so the expander tries to reuse them.
565709467b48Spatrick   for (const IVChain &Chain : IVChainVec) {
565809467b48Spatrick     if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
565909467b48Spatrick       Rewriter.setChainedPhi(PN);
566009467b48Spatrick   }
566109467b48Spatrick 
566209467b48Spatrick   // Expand the new value definitions and update the users.
566309467b48Spatrick   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
566409467b48Spatrick     for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
5665*d415bd75Srobert       Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
566609467b48Spatrick       Changed = true;
566709467b48Spatrick     }
566809467b48Spatrick 
566909467b48Spatrick   for (const IVChain &Chain : IVChainVec) {
5670*d415bd75Srobert     GenerateIVChain(Chain, DeadInsts);
567109467b48Spatrick     Changed = true;
567209467b48Spatrick   }
567373471bf0Spatrick 
567473471bf0Spatrick   for (const WeakVH &IV : Rewriter.getInsertedIVs())
567573471bf0Spatrick     if (IV && dyn_cast<Instruction>(&*IV)->getParent())
567673471bf0Spatrick       ScalarEvolutionIVs.push_back(IV);
567773471bf0Spatrick 
567809467b48Spatrick   // Clean up after ourselves. This must be done before deleting any
567909467b48Spatrick   // instructions.
568009467b48Spatrick   Rewriter.clear();
568109467b48Spatrick 
5682097a140dSpatrick   Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
5683097a140dSpatrick                                                                   &TLI, MSSAU);
568473471bf0Spatrick 
568573471bf0Spatrick   // In our cost analysis above, we assume that each addrec consumes exactly
568673471bf0Spatrick   // one register, and arrange to have increments inserted just before the
568773471bf0Spatrick   // latch to maximimize the chance this is true.  However, if we reused
568873471bf0Spatrick   // existing IVs, we now need to move the increments to match our
568973471bf0Spatrick   // expectations.  Otherwise, our cost modeling results in us having a
569073471bf0Spatrick   // chosen a non-optimal result for the actual schedule.  (And yes, this
569173471bf0Spatrick   // scheduling decision does impact later codegen.)
569273471bf0Spatrick   for (PHINode &PN : L->getHeader()->phis()) {
569373471bf0Spatrick     BinaryOperator *BO = nullptr;
569473471bf0Spatrick     Value *Start = nullptr, *Step = nullptr;
569573471bf0Spatrick     if (!matchSimpleRecurrence(&PN, BO, Start, Step))
569673471bf0Spatrick       continue;
569773471bf0Spatrick 
569873471bf0Spatrick     switch (BO->getOpcode()) {
569973471bf0Spatrick     case Instruction::Sub:
570073471bf0Spatrick       if (BO->getOperand(0) != &PN)
570173471bf0Spatrick         // sub is non-commutative - match handling elsewhere in LSR
570273471bf0Spatrick         continue;
570373471bf0Spatrick       break;
570473471bf0Spatrick     case Instruction::Add:
570573471bf0Spatrick       break;
570673471bf0Spatrick     default:
570773471bf0Spatrick       continue;
570873471bf0Spatrick     };
570973471bf0Spatrick 
571073471bf0Spatrick     if (!isa<Constant>(Step))
571173471bf0Spatrick       // If not a constant step, might increase register pressure
571273471bf0Spatrick       // (We assume constants have been canonicalized to RHS)
571373471bf0Spatrick       continue;
571473471bf0Spatrick 
571573471bf0Spatrick     if (BO->getParent() == IVIncInsertPos->getParent())
571673471bf0Spatrick       // Only bother moving across blocks.  Isel can handle block local case.
571773471bf0Spatrick       continue;
571873471bf0Spatrick 
571973471bf0Spatrick     // Can we legally schedule inc at the desired point?
572073471bf0Spatrick     if (!llvm::all_of(BO->uses(),
572173471bf0Spatrick                       [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
572273471bf0Spatrick       continue;
572373471bf0Spatrick     BO->moveBefore(IVIncInsertPos);
572473471bf0Spatrick     Changed = true;
572573471bf0Spatrick   }
572673471bf0Spatrick 
572773471bf0Spatrick 
572809467b48Spatrick }
572909467b48Spatrick 
LSRInstance(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI,AssumptionCache & AC,TargetLibraryInfo & TLI,MemorySSAUpdater * MSSAU)573009467b48Spatrick LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
573109467b48Spatrick                          DominatorTree &DT, LoopInfo &LI,
573209467b48Spatrick                          const TargetTransformInfo &TTI, AssumptionCache &AC,
5733097a140dSpatrick                          TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
5734097a140dSpatrick     : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
5735*d415bd75Srobert       MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
5736*d415bd75Srobert                             ? PreferredAddresingMode
5737*d415bd75Srobert                             : TTI.getPreferredAddressingMode(L, &SE)),
5738*d415bd75Srobert       Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false),
5739*d415bd75Srobert       BaselineCost(L, SE, TTI, AMK) {
574009467b48Spatrick   // If LoopSimplify form is not available, stay out of trouble.
574109467b48Spatrick   if (!L->isLoopSimplifyForm())
574209467b48Spatrick     return;
574309467b48Spatrick 
574409467b48Spatrick   // If there's no interesting work to be done, bail early.
574509467b48Spatrick   if (IU.empty()) return;
574609467b48Spatrick 
574709467b48Spatrick   // If there's too much analysis to be done, bail early. We won't be able to
574809467b48Spatrick   // model the problem anyway.
574909467b48Spatrick   unsigned NumUsers = 0;
575009467b48Spatrick   for (const IVStrideUse &U : IU) {
575109467b48Spatrick     if (++NumUsers > MaxIVUsers) {
575209467b48Spatrick       (void)U;
575309467b48Spatrick       LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
575409467b48Spatrick                         << "\n");
575509467b48Spatrick       return;
575609467b48Spatrick     }
575709467b48Spatrick     // Bail out if we have a PHI on an EHPad that gets a value from a
575809467b48Spatrick     // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is
575909467b48Spatrick     // no good place to stick any instructions.
576009467b48Spatrick     if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
576109467b48Spatrick        auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
576209467b48Spatrick        if (isa<FuncletPadInst>(FirstNonPHI) ||
576309467b48Spatrick            isa<CatchSwitchInst>(FirstNonPHI))
576409467b48Spatrick          for (BasicBlock *PredBB : PN->blocks())
576509467b48Spatrick            if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
576609467b48Spatrick              return;
576709467b48Spatrick     }
576809467b48Spatrick   }
576909467b48Spatrick 
577009467b48Spatrick   LLVM_DEBUG(dbgs() << "\nLSR on loop ";
577109467b48Spatrick              L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
577209467b48Spatrick              dbgs() << ":\n");
577309467b48Spatrick 
5774*d415bd75Srobert   // Configure SCEVExpander already now, so the correct mode is used for
5775*d415bd75Srobert   // isSafeToExpand() checks.
5776*d415bd75Srobert #ifndef NDEBUG
5777*d415bd75Srobert   Rewriter.setDebugType(DEBUG_TYPE);
5778*d415bd75Srobert #endif
5779*d415bd75Srobert   Rewriter.disableCanonicalMode();
5780*d415bd75Srobert   Rewriter.enableLSRMode();
5781*d415bd75Srobert 
578209467b48Spatrick   // First, perform some low-level loop optimizations.
578309467b48Spatrick   OptimizeShadowIV();
578409467b48Spatrick   OptimizeLoopTermCond();
578509467b48Spatrick 
578609467b48Spatrick   // If loop preparation eliminates all interesting IV users, bail.
578709467b48Spatrick   if (IU.empty()) return;
578809467b48Spatrick 
578909467b48Spatrick   // Skip nested loops until we can model them better with formulae.
579073471bf0Spatrick   if (!L->isInnermost()) {
579109467b48Spatrick     LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
579209467b48Spatrick     return;
579309467b48Spatrick   }
579409467b48Spatrick 
579509467b48Spatrick   // Start collecting data and preparing for the solver.
579673471bf0Spatrick   // If number of registers is not the major cost, we cannot benefit from the
579773471bf0Spatrick   // current profitable chain optimization which is based on number of
579873471bf0Spatrick   // registers.
579973471bf0Spatrick   // FIXME: add profitable chain optimization for other kinds major cost, for
580073471bf0Spatrick   // example number of instructions.
580173471bf0Spatrick   if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
580209467b48Spatrick     CollectChains();
580309467b48Spatrick   CollectInterestingTypesAndFactors();
580409467b48Spatrick   CollectFixupsAndInitialFormulae();
580509467b48Spatrick   CollectLoopInvariantFixupsAndFormulae();
580609467b48Spatrick 
580709467b48Spatrick   if (Uses.empty())
580809467b48Spatrick     return;
580909467b48Spatrick 
581009467b48Spatrick   LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
581109467b48Spatrick              print_uses(dbgs()));
581209467b48Spatrick 
581309467b48Spatrick   // Now use the reuse data to generate a bunch of interesting ways
581409467b48Spatrick   // to formulate the values needed for the uses.
581509467b48Spatrick   GenerateAllReuseFormulae();
581609467b48Spatrick 
581709467b48Spatrick   FilterOutUndesirableDedicatedRegisters();
581809467b48Spatrick   NarrowSearchSpaceUsingHeuristics();
581909467b48Spatrick 
582009467b48Spatrick   SmallVector<const Formula *, 8> Solution;
582109467b48Spatrick   Solve(Solution);
582209467b48Spatrick 
582309467b48Spatrick   // Release memory that is no longer needed.
582409467b48Spatrick   Factors.clear();
582509467b48Spatrick   Types.clear();
582609467b48Spatrick   RegUses.clear();
582709467b48Spatrick 
582809467b48Spatrick   if (Solution.empty())
582909467b48Spatrick     return;
583009467b48Spatrick 
583109467b48Spatrick #ifndef NDEBUG
583209467b48Spatrick   // Formulae should be legal.
583309467b48Spatrick   for (const LSRUse &LU : Uses) {
583409467b48Spatrick     for (const Formula &F : LU.Formulae)
583509467b48Spatrick       assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
583609467b48Spatrick                         F) && "Illegal formula generated!");
583709467b48Spatrick   };
583809467b48Spatrick #endif
583909467b48Spatrick 
584009467b48Spatrick   // Now that we've decided what we want, make it so.
584109467b48Spatrick   ImplementSolution(Solution);
584209467b48Spatrick }
584309467b48Spatrick 
584409467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print_factors_and_types(raw_ostream & OS) const584509467b48Spatrick void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
584609467b48Spatrick   if (Factors.empty() && Types.empty()) return;
584709467b48Spatrick 
584809467b48Spatrick   OS << "LSR has identified the following interesting factors and types: ";
584909467b48Spatrick   bool First = true;
585009467b48Spatrick 
585109467b48Spatrick   for (int64_t Factor : Factors) {
585209467b48Spatrick     if (!First) OS << ", ";
585309467b48Spatrick     First = false;
585409467b48Spatrick     OS << '*' << Factor;
585509467b48Spatrick   }
585609467b48Spatrick 
585709467b48Spatrick   for (Type *Ty : Types) {
585809467b48Spatrick     if (!First) OS << ", ";
585909467b48Spatrick     First = false;
586009467b48Spatrick     OS << '(' << *Ty << ')';
586109467b48Spatrick   }
586209467b48Spatrick   OS << '\n';
586309467b48Spatrick }
586409467b48Spatrick 
print_fixups(raw_ostream & OS) const586509467b48Spatrick void LSRInstance::print_fixups(raw_ostream &OS) const {
586609467b48Spatrick   OS << "LSR is examining the following fixup sites:\n";
586709467b48Spatrick   for (const LSRUse &LU : Uses)
586809467b48Spatrick     for (const LSRFixup &LF : LU.Fixups) {
586909467b48Spatrick       dbgs() << "  ";
587009467b48Spatrick       LF.print(OS);
587109467b48Spatrick       OS << '\n';
587209467b48Spatrick     }
587309467b48Spatrick }
587409467b48Spatrick 
print_uses(raw_ostream & OS) const587509467b48Spatrick void LSRInstance::print_uses(raw_ostream &OS) const {
587609467b48Spatrick   OS << "LSR is examining the following uses:\n";
587709467b48Spatrick   for (const LSRUse &LU : Uses) {
587809467b48Spatrick     dbgs() << "  ";
587909467b48Spatrick     LU.print(OS);
588009467b48Spatrick     OS << '\n';
588109467b48Spatrick     for (const Formula &F : LU.Formulae) {
588209467b48Spatrick       OS << "    ";
588309467b48Spatrick       F.print(OS);
588409467b48Spatrick       OS << '\n';
588509467b48Spatrick     }
588609467b48Spatrick   }
588709467b48Spatrick }
588809467b48Spatrick 
print(raw_ostream & OS) const588909467b48Spatrick void LSRInstance::print(raw_ostream &OS) const {
589009467b48Spatrick   print_factors_and_types(OS);
589109467b48Spatrick   print_fixups(OS);
589209467b48Spatrick   print_uses(OS);
589309467b48Spatrick }
589409467b48Spatrick 
dump() const589509467b48Spatrick LLVM_DUMP_METHOD void LSRInstance::dump() const {
589609467b48Spatrick   print(errs()); errs() << '\n';
589709467b48Spatrick }
589809467b48Spatrick #endif
589909467b48Spatrick 
590009467b48Spatrick namespace {
590109467b48Spatrick 
590209467b48Spatrick class LoopStrengthReduce : public LoopPass {
590309467b48Spatrick public:
590409467b48Spatrick   static char ID; // Pass ID, replacement for typeid
590509467b48Spatrick 
590609467b48Spatrick   LoopStrengthReduce();
590709467b48Spatrick 
590809467b48Spatrick private:
590909467b48Spatrick   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
591009467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override;
591109467b48Spatrick };
591209467b48Spatrick 
591309467b48Spatrick } // end anonymous namespace
591409467b48Spatrick 
LoopStrengthReduce()591509467b48Spatrick LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
591609467b48Spatrick   initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
591709467b48Spatrick }
591809467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const591909467b48Spatrick void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
592009467b48Spatrick   // We split critical edges, so we change the CFG.  However, we do update
592109467b48Spatrick   // many analyses if they are around.
592209467b48Spatrick   AU.addPreservedID(LoopSimplifyID);
592309467b48Spatrick 
592409467b48Spatrick   AU.addRequired<LoopInfoWrapperPass>();
592509467b48Spatrick   AU.addPreserved<LoopInfoWrapperPass>();
592609467b48Spatrick   AU.addRequiredID(LoopSimplifyID);
592709467b48Spatrick   AU.addRequired<DominatorTreeWrapperPass>();
592809467b48Spatrick   AU.addPreserved<DominatorTreeWrapperPass>();
592909467b48Spatrick   AU.addRequired<ScalarEvolutionWrapperPass>();
593009467b48Spatrick   AU.addPreserved<ScalarEvolutionWrapperPass>();
593109467b48Spatrick   AU.addRequired<AssumptionCacheTracker>();
593209467b48Spatrick   AU.addRequired<TargetLibraryInfoWrapperPass>();
593309467b48Spatrick   // Requiring LoopSimplify a second time here prevents IVUsers from running
593409467b48Spatrick   // twice, since LoopSimplify was invalidated by running ScalarEvolution.
593509467b48Spatrick   AU.addRequiredID(LoopSimplifyID);
593609467b48Spatrick   AU.addRequired<IVUsersWrapperPass>();
593709467b48Spatrick   AU.addPreserved<IVUsersWrapperPass>();
593809467b48Spatrick   AU.addRequired<TargetTransformInfoWrapperPass>();
5939097a140dSpatrick   AU.addPreserved<MemorySSAWrapperPass>();
594009467b48Spatrick }
594109467b48Spatrick 
5942*d415bd75Srobert namespace {
5943*d415bd75Srobert 
5944*d415bd75Srobert /// Enables more convenient iteration over a DWARF expression vector.
5945*d415bd75Srobert static iterator_range<llvm::DIExpression::expr_op_iterator>
ToDwarfOpIter(SmallVectorImpl<uint64_t> & Expr)5946*d415bd75Srobert ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
5947*d415bd75Srobert   llvm::DIExpression::expr_op_iterator Begin =
5948*d415bd75Srobert       llvm::DIExpression::expr_op_iterator(Expr.begin());
5949*d415bd75Srobert   llvm::DIExpression::expr_op_iterator End =
5950*d415bd75Srobert       llvm::DIExpression::expr_op_iterator(Expr.end());
5951*d415bd75Srobert   return {Begin, End};
5952*d415bd75Srobert }
5953*d415bd75Srobert 
595473471bf0Spatrick struct SCEVDbgValueBuilder {
595573471bf0Spatrick   SCEVDbgValueBuilder() = default;
SCEVDbgValueBuilder__anon3e7394c11211::SCEVDbgValueBuilder5956*d415bd75Srobert   SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
5957*d415bd75Srobert 
clone__anon3e7394c11211::SCEVDbgValueBuilder5958*d415bd75Srobert   void clone(const SCEVDbgValueBuilder &Base) {
5959*d415bd75Srobert     LocationOps = Base.LocationOps;
596073471bf0Spatrick     Expr = Base.Expr;
596173471bf0Spatrick   }
596273471bf0Spatrick 
clear__anon3e7394c11211::SCEVDbgValueBuilder5963*d415bd75Srobert   void clear() {
5964*d415bd75Srobert     LocationOps.clear();
5965*d415bd75Srobert     Expr.clear();
5966*d415bd75Srobert   }
5967*d415bd75Srobert 
596873471bf0Spatrick   /// The DIExpression as we translate the SCEV.
596973471bf0Spatrick   SmallVector<uint64_t, 6> Expr;
597073471bf0Spatrick   /// The location ops of the DIExpression.
5971*d415bd75Srobert   SmallVector<Value *, 2> LocationOps;
597273471bf0Spatrick 
pushOperator__anon3e7394c11211::SCEVDbgValueBuilder597373471bf0Spatrick   void pushOperator(uint64_t Op) { Expr.push_back(Op); }
pushUInt__anon3e7394c11211::SCEVDbgValueBuilder597473471bf0Spatrick   void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
597573471bf0Spatrick 
597673471bf0Spatrick   /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
597773471bf0Spatrick   /// in the set of values referenced by the expression.
pushLocation__anon3e7394c11211::SCEVDbgValueBuilder5978*d415bd75Srobert   void pushLocation(llvm::Value *V) {
597973471bf0Spatrick     Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
5980*d415bd75Srobert     auto *It = llvm::find(LocationOps, V);
598173471bf0Spatrick     unsigned ArgIndex = 0;
5982*d415bd75Srobert     if (It != LocationOps.end()) {
5983*d415bd75Srobert       ArgIndex = std::distance(LocationOps.begin(), It);
598473471bf0Spatrick     } else {
5985*d415bd75Srobert       ArgIndex = LocationOps.size();
5986*d415bd75Srobert       LocationOps.push_back(V);
598773471bf0Spatrick     }
598873471bf0Spatrick     Expr.push_back(ArgIndex);
598973471bf0Spatrick   }
599073471bf0Spatrick 
pushValue__anon3e7394c11211::SCEVDbgValueBuilder599173471bf0Spatrick   void pushValue(const SCEVUnknown *U) {
599273471bf0Spatrick     llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
5993*d415bd75Srobert     pushLocation(V);
599473471bf0Spatrick   }
599573471bf0Spatrick 
pushConst__anon3e7394c11211::SCEVDbgValueBuilder599673471bf0Spatrick   bool pushConst(const SCEVConstant *C) {
599773471bf0Spatrick     if (C->getAPInt().getMinSignedBits() > 64)
599873471bf0Spatrick       return false;
599973471bf0Spatrick     Expr.push_back(llvm::dwarf::DW_OP_consts);
600073471bf0Spatrick     Expr.push_back(C->getAPInt().getSExtValue());
600173471bf0Spatrick     return true;
600273471bf0Spatrick   }
600373471bf0Spatrick 
6004*d415bd75Srobert   // Iterating the expression as DWARF ops is convenient when updating
6005*d415bd75Srobert   // DWARF_OP_LLVM_args.
expr_ops__anon3e7394c11211::SCEVDbgValueBuilder6006*d415bd75Srobert   iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
6007*d415bd75Srobert     return ToDwarfOpIter(Expr);
6008*d415bd75Srobert   }
6009*d415bd75Srobert 
601073471bf0Spatrick   /// Several SCEV types are sequences of the same arithmetic operator applied
601173471bf0Spatrick   /// to constants and values that may be extended or truncated.
pushArithmeticExpr__anon3e7394c11211::SCEVDbgValueBuilder601273471bf0Spatrick   bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
601373471bf0Spatrick                           uint64_t DwarfOp) {
601473471bf0Spatrick     assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
601573471bf0Spatrick            "Expected arithmetic SCEV type");
601673471bf0Spatrick     bool Success = true;
601773471bf0Spatrick     unsigned EmitOperator = 0;
6018*d415bd75Srobert     for (const auto &Op : CommExpr->operands()) {
601973471bf0Spatrick       Success &= pushSCEV(Op);
602073471bf0Spatrick 
602173471bf0Spatrick       if (EmitOperator >= 1)
602273471bf0Spatrick         pushOperator(DwarfOp);
602373471bf0Spatrick       ++EmitOperator;
602473471bf0Spatrick     }
602573471bf0Spatrick     return Success;
602673471bf0Spatrick   }
602773471bf0Spatrick 
602873471bf0Spatrick   // TODO: Identify and omit noop casts.
pushCast__anon3e7394c11211::SCEVDbgValueBuilder602973471bf0Spatrick   bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
603073471bf0Spatrick     const llvm::SCEV *Inner = C->getOperand(0);
603173471bf0Spatrick     const llvm::Type *Type = C->getType();
603273471bf0Spatrick     uint64_t ToWidth = Type->getIntegerBitWidth();
603373471bf0Spatrick     bool Success = pushSCEV(Inner);
603473471bf0Spatrick     uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
603573471bf0Spatrick                           IsSigned ? llvm::dwarf::DW_ATE_signed
603673471bf0Spatrick                                    : llvm::dwarf::DW_ATE_unsigned};
603773471bf0Spatrick     for (const auto &Op : CastOps)
603873471bf0Spatrick       pushOperator(Op);
603973471bf0Spatrick     return Success;
604073471bf0Spatrick   }
604173471bf0Spatrick 
604273471bf0Spatrick   // TODO: MinMax - although these haven't been encountered in the test suite.
pushSCEV__anon3e7394c11211::SCEVDbgValueBuilder604373471bf0Spatrick   bool pushSCEV(const llvm::SCEV *S) {
604473471bf0Spatrick     bool Success = true;
604573471bf0Spatrick     if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
604673471bf0Spatrick       Success &= pushConst(StartInt);
604773471bf0Spatrick 
604873471bf0Spatrick     } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
604973471bf0Spatrick       if (!U->getValue())
605073471bf0Spatrick         return false;
6051*d415bd75Srobert       pushLocation(U->getValue());
605273471bf0Spatrick 
605373471bf0Spatrick     } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
605473471bf0Spatrick       Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
605573471bf0Spatrick 
605673471bf0Spatrick     } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
605773471bf0Spatrick       Success &= pushSCEV(UDiv->getLHS());
605873471bf0Spatrick       Success &= pushSCEV(UDiv->getRHS());
605973471bf0Spatrick       pushOperator(llvm::dwarf::DW_OP_div);
606073471bf0Spatrick 
606173471bf0Spatrick     } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
606273471bf0Spatrick       // Assert if a new and unknown SCEVCastEXpr type is encountered.
606373471bf0Spatrick       assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
606473471bf0Spatrick               isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
606573471bf0Spatrick              "Unexpected cast type in SCEV.");
606673471bf0Spatrick       Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
606773471bf0Spatrick 
606873471bf0Spatrick     } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
606973471bf0Spatrick       Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
607073471bf0Spatrick 
607173471bf0Spatrick     } else if (isa<SCEVAddRecExpr>(S)) {
607273471bf0Spatrick       // Nested SCEVAddRecExpr are generated by nested loops and are currently
607373471bf0Spatrick       // unsupported.
607473471bf0Spatrick       return false;
607573471bf0Spatrick 
607673471bf0Spatrick     } else {
607773471bf0Spatrick       return false;
607873471bf0Spatrick     }
607973471bf0Spatrick     return Success;
608073471bf0Spatrick   }
608173471bf0Spatrick 
608273471bf0Spatrick   /// Return true if the combination of arithmetic operator and underlying
608373471bf0Spatrick   /// SCEV constant value is an identity function.
isIdentityFunction__anon3e7394c11211::SCEVDbgValueBuilder608473471bf0Spatrick   bool isIdentityFunction(uint64_t Op, const SCEV *S) {
608573471bf0Spatrick     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
608673471bf0Spatrick       if (C->getAPInt().getMinSignedBits() > 64)
608773471bf0Spatrick         return false;
608873471bf0Spatrick       int64_t I = C->getAPInt().getSExtValue();
608973471bf0Spatrick       switch (Op) {
609073471bf0Spatrick       case llvm::dwarf::DW_OP_plus:
609173471bf0Spatrick       case llvm::dwarf::DW_OP_minus:
609273471bf0Spatrick         return I == 0;
609373471bf0Spatrick       case llvm::dwarf::DW_OP_mul:
609473471bf0Spatrick       case llvm::dwarf::DW_OP_div:
609573471bf0Spatrick         return I == 1;
609673471bf0Spatrick       }
609773471bf0Spatrick     }
609873471bf0Spatrick     return false;
609973471bf0Spatrick   }
610073471bf0Spatrick 
610173471bf0Spatrick   /// Convert a SCEV of a value to a DIExpression that is pushed onto the
610273471bf0Spatrick   /// builder's expression stack. The stack should already contain an
610373471bf0Spatrick   /// expression for the iteration count, so that it can be multiplied by
610473471bf0Spatrick   /// the stride and added to the start.
610573471bf0Spatrick   /// Components of the expression are omitted if they are an identity function.
610673471bf0Spatrick   /// Chain (non-affine) SCEVs are not supported.
SCEVToValueExpr__anon3e7394c11211::SCEVDbgValueBuilder610773471bf0Spatrick   bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
610873471bf0Spatrick     assert(SAR.isAffine() && "Expected affine SCEV");
610973471bf0Spatrick     // TODO: Is this check needed?
611073471bf0Spatrick     if (isa<SCEVAddRecExpr>(SAR.getStart()))
611173471bf0Spatrick       return false;
611273471bf0Spatrick 
611373471bf0Spatrick     const SCEV *Start = SAR.getStart();
611473471bf0Spatrick     const SCEV *Stride = SAR.getStepRecurrence(SE);
611573471bf0Spatrick 
611673471bf0Spatrick     // Skip pushing arithmetic noops.
611773471bf0Spatrick     if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
611873471bf0Spatrick       if (!pushSCEV(Stride))
611973471bf0Spatrick         return false;
612073471bf0Spatrick       pushOperator(llvm::dwarf::DW_OP_mul);
612173471bf0Spatrick     }
612273471bf0Spatrick     if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
612373471bf0Spatrick       if (!pushSCEV(Start))
612473471bf0Spatrick         return false;
612573471bf0Spatrick       pushOperator(llvm::dwarf::DW_OP_plus);
612673471bf0Spatrick     }
612773471bf0Spatrick     return true;
612873471bf0Spatrick   }
612973471bf0Spatrick 
6130*d415bd75Srobert   /// Create an expression that is an offset from a value (usually the IV).
createOffsetExpr__anon3e7394c11211::SCEVDbgValueBuilder6131*d415bd75Srobert   void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
6132*d415bd75Srobert     pushLocation(OffsetValue);
6133*d415bd75Srobert     DIExpression::appendOffset(Expr, Offset);
6134*d415bd75Srobert     LLVM_DEBUG(
6135*d415bd75Srobert         dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
6136*d415bd75Srobert                << std::to_string(Offset) << "\n");
6137*d415bd75Srobert   }
6138*d415bd75Srobert 
6139*d415bd75Srobert   /// Combine a translation of the SCEV and the IV to create an expression that
6140*d415bd75Srobert   /// recovers a location's value.
6141*d415bd75Srobert   /// returns true if an expression was created.
createIterCountExpr__anon3e7394c11211::SCEVDbgValueBuilder6142*d415bd75Srobert   bool createIterCountExpr(const SCEV *S,
6143*d415bd75Srobert                            const SCEVDbgValueBuilder &IterationCount,
6144*d415bd75Srobert                            ScalarEvolution &SE) {
6145*d415bd75Srobert     // SCEVs for SSA values are most frquently of the form
6146*d415bd75Srobert     // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
6147*d415bd75Srobert     // This is because %a is a PHI node that is not the IV. However, these
6148*d415bd75Srobert     // SCEVs have not been observed to result in debuginfo-lossy optimisations,
6149*d415bd75Srobert     // so its not expected this point will be reached.
6150*d415bd75Srobert     if (!isa<SCEVAddRecExpr>(S))
6151*d415bd75Srobert       return false;
6152*d415bd75Srobert 
6153*d415bd75Srobert     LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
6154*d415bd75Srobert                       << '\n');
6155*d415bd75Srobert 
6156*d415bd75Srobert     const auto *Rec = cast<SCEVAddRecExpr>(S);
6157*d415bd75Srobert     if (!Rec->isAffine())
6158*d415bd75Srobert       return false;
6159*d415bd75Srobert 
6160*d415bd75Srobert     if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6161*d415bd75Srobert       return false;
6162*d415bd75Srobert 
6163*d415bd75Srobert     // Initialise a new builder with the iteration count expression. In
6164*d415bd75Srobert     // combination with the value's SCEV this enables recovery.
6165*d415bd75Srobert     clone(IterationCount);
6166*d415bd75Srobert     if (!SCEVToValueExpr(*Rec, SE))
6167*d415bd75Srobert       return false;
6168*d415bd75Srobert 
6169*d415bd75Srobert     return true;
6170*d415bd75Srobert   }
6171*d415bd75Srobert 
617273471bf0Spatrick   /// Convert a SCEV of a value to a DIExpression that is pushed onto the
617373471bf0Spatrick   /// builder's expression stack. The stack should already contain an
617473471bf0Spatrick   /// expression for the iteration count, so that it can be multiplied by
617573471bf0Spatrick   /// the stride and added to the start.
617673471bf0Spatrick   /// Components of the expression are omitted if they are an identity function.
SCEVToIterCountExpr__anon3e7394c11211::SCEVDbgValueBuilder617773471bf0Spatrick   bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
617873471bf0Spatrick                            ScalarEvolution &SE) {
617973471bf0Spatrick     assert(SAR.isAffine() && "Expected affine SCEV");
618073471bf0Spatrick     if (isa<SCEVAddRecExpr>(SAR.getStart())) {
618173471bf0Spatrick       LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
618273471bf0Spatrick                         << SAR << '\n');
618373471bf0Spatrick       return false;
618473471bf0Spatrick     }
618573471bf0Spatrick     const SCEV *Start = SAR.getStart();
618673471bf0Spatrick     const SCEV *Stride = SAR.getStepRecurrence(SE);
618773471bf0Spatrick 
618873471bf0Spatrick     // Skip pushing arithmetic noops.
618973471bf0Spatrick     if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
619073471bf0Spatrick       if (!pushSCEV(Start))
619173471bf0Spatrick         return false;
619273471bf0Spatrick       pushOperator(llvm::dwarf::DW_OP_minus);
619373471bf0Spatrick     }
619473471bf0Spatrick     if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
619573471bf0Spatrick       if (!pushSCEV(Stride))
619673471bf0Spatrick         return false;
619773471bf0Spatrick       pushOperator(llvm::dwarf::DW_OP_div);
619873471bf0Spatrick     }
619973471bf0Spatrick     return true;
620073471bf0Spatrick   }
6201*d415bd75Srobert 
6202*d415bd75Srobert   // Append the current expression and locations to a location list and an
6203*d415bd75Srobert   // expression list. Modify the DW_OP_LLVM_arg indexes to account for
6204*d415bd75Srobert   // the locations already present in the destination list.
appendToVectors__anon3e7394c11211::SCEVDbgValueBuilder6205*d415bd75Srobert   void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6206*d415bd75Srobert                        SmallVectorImpl<Value *> &DestLocations) {
6207*d415bd75Srobert     assert(!DestLocations.empty() &&
6208*d415bd75Srobert            "Expected the locations vector to contain the IV");
6209*d415bd75Srobert     // The DWARF_OP_LLVM_arg arguments of the expression being appended must be
6210*d415bd75Srobert     // modified to account for the locations already in the destination vector.
6211*d415bd75Srobert     // All builders contain the IV as the first location op.
6212*d415bd75Srobert     assert(!LocationOps.empty() &&
6213*d415bd75Srobert            "Expected the location ops to contain the IV.");
6214*d415bd75Srobert     // DestIndexMap[n] contains the index in DestLocations for the nth
6215*d415bd75Srobert     // location in this SCEVDbgValueBuilder.
6216*d415bd75Srobert     SmallVector<uint64_t, 2> DestIndexMap;
6217*d415bd75Srobert     for (const auto &Op : LocationOps) {
6218*d415bd75Srobert       auto It = find(DestLocations, Op);
6219*d415bd75Srobert       if (It != DestLocations.end()) {
6220*d415bd75Srobert         // Location already exists in DestLocations, reuse existing ArgIndex.
6221*d415bd75Srobert         DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
6222*d415bd75Srobert         continue;
6223*d415bd75Srobert       }
6224*d415bd75Srobert       // Location is not in DestLocations, add it.
6225*d415bd75Srobert       DestIndexMap.push_back(DestLocations.size());
6226*d415bd75Srobert       DestLocations.push_back(Op);
6227*d415bd75Srobert     }
6228*d415bd75Srobert 
6229*d415bd75Srobert     for (const auto &Op : expr_ops()) {
6230*d415bd75Srobert       if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6231*d415bd75Srobert         Op.appendToVector(DestExpr);
6232*d415bd75Srobert         continue;
6233*d415bd75Srobert       }
6234*d415bd75Srobert 
6235*d415bd75Srobert       DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
6236*d415bd75Srobert       // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
6237*d415bd75Srobert       // DestIndexMap[n] contains its new index in DestLocations.
6238*d415bd75Srobert       uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
6239*d415bd75Srobert       DestExpr.push_back(NewIndex);
6240*d415bd75Srobert     }
6241*d415bd75Srobert   }
624273471bf0Spatrick };
624373471bf0Spatrick 
6244*d415bd75Srobert /// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
6245*d415bd75Srobert /// and DIExpression.
624673471bf0Spatrick struct DVIRecoveryRec {
DVIRecoveryRec__anon3e7394c11211::DVIRecoveryRec6247*d415bd75Srobert   DVIRecoveryRec(DbgValueInst *DbgValue)
6248*d415bd75Srobert       : DVI(DbgValue), Expr(DbgValue->getExpression()),
6249*d415bd75Srobert         HadLocationArgList(false) {}
6250*d415bd75Srobert 
625173471bf0Spatrick   DbgValueInst *DVI;
625273471bf0Spatrick   DIExpression *Expr;
6253*d415bd75Srobert   bool HadLocationArgList;
6254*d415bd75Srobert   SmallVector<WeakVH, 2> LocationOps;
6255*d415bd75Srobert   SmallVector<const llvm::SCEV *, 2> SCEVs;
6256*d415bd75Srobert   SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
6257*d415bd75Srobert 
clear__anon3e7394c11211::DVIRecoveryRec6258*d415bd75Srobert   void clear() {
6259*d415bd75Srobert     for (auto &RE : RecoveryExprs)
6260*d415bd75Srobert       RE.reset();
6261*d415bd75Srobert     RecoveryExprs.clear();
6262*d415bd75Srobert   }
6263*d415bd75Srobert 
~DVIRecoveryRec__anon3e7394c11211::DVIRecoveryRec6264*d415bd75Srobert   ~DVIRecoveryRec() { clear(); }
626573471bf0Spatrick };
6266*d415bd75Srobert } // namespace
626773471bf0Spatrick 
6268*d415bd75Srobert /// Returns the total number of DW_OP_llvm_arg operands in the expression.
6269*d415bd75Srobert /// This helps in determining if a DIArglist is necessary or can be omitted from
6270*d415bd75Srobert /// the dbg.value.
numLLVMArgOps(SmallVectorImpl<uint64_t> & Expr)6271*d415bd75Srobert static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
6272*d415bd75Srobert   auto expr_ops = ToDwarfOpIter(Expr);
6273*d415bd75Srobert   unsigned Count = 0;
6274*d415bd75Srobert   for (auto Op : expr_ops)
6275*d415bd75Srobert     if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
6276*d415bd75Srobert       Count++;
6277*d415bd75Srobert   return Count;
6278*d415bd75Srobert }
6279*d415bd75Srobert 
6280*d415bd75Srobert /// Overwrites DVI with the location and Ops as the DIExpression. This will
6281*d415bd75Srobert /// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
6282*d415bd75Srobert /// because a DIArglist is not created for the first argument of the dbg.value.
updateDVIWithLocation(DbgValueInst & DVI,Value * Location,SmallVectorImpl<uint64_t> & Ops)6283*d415bd75Srobert static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location,
6284*d415bd75Srobert                                   SmallVectorImpl<uint64_t> &Ops) {
6285*d415bd75Srobert   assert(
6286*d415bd75Srobert       numLLVMArgOps(Ops) == 0 &&
6287*d415bd75Srobert       "Expected expression that does not contain any DW_OP_llvm_arg operands.");
6288*d415bd75Srobert   DVI.setRawLocation(ValueAsMetadata::get(Location));
6289*d415bd75Srobert   DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
6290*d415bd75Srobert }
6291*d415bd75Srobert 
6292*d415bd75Srobert /// Overwrite DVI with locations placed into a DIArglist.
updateDVIWithLocations(DbgValueInst & DVI,SmallVectorImpl<Value * > & Locations,SmallVectorImpl<uint64_t> & Ops)6293*d415bd75Srobert static void updateDVIWithLocations(DbgValueInst &DVI,
6294*d415bd75Srobert                                    SmallVectorImpl<Value *> &Locations,
6295*d415bd75Srobert                                    SmallVectorImpl<uint64_t> &Ops) {
6296*d415bd75Srobert   assert(numLLVMArgOps(Ops) != 0 &&
6297*d415bd75Srobert          "Expected expression that references DIArglist locations using "
6298*d415bd75Srobert          "DW_OP_llvm_arg operands.");
6299*d415bd75Srobert   SmallVector<ValueAsMetadata *, 3> MetadataLocs;
6300*d415bd75Srobert   for (Value *V : Locations)
6301*d415bd75Srobert     MetadataLocs.push_back(ValueAsMetadata::get(V));
6302*d415bd75Srobert   auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6303*d415bd75Srobert   DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef));
6304*d415bd75Srobert   DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
6305*d415bd75Srobert }
6306*d415bd75Srobert 
6307*d415bd75Srobert /// Write the new expression and new location ops for the dbg.value. If possible
6308*d415bd75Srobert /// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
6309*d415bd75Srobert /// can be omitted if:
6310*d415bd75Srobert /// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
6311*d415bd75Srobert /// 2. The DW_OP_LLVM_arg is the first operand in the expression.
UpdateDbgValueInst(DVIRecoveryRec & DVIRec,SmallVectorImpl<Value * > & NewLocationOps,SmallVectorImpl<uint64_t> & NewExpr)6312*d415bd75Srobert static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
6313*d415bd75Srobert                                SmallVectorImpl<Value *> &NewLocationOps,
6314*d415bd75Srobert                                SmallVectorImpl<uint64_t> &NewExpr) {
6315*d415bd75Srobert   unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
6316*d415bd75Srobert   if (NumLLVMArgs == 0) {
6317*d415bd75Srobert     // Location assumed to be on the stack.
6318*d415bd75Srobert     updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr);
6319*d415bd75Srobert   } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
6320*d415bd75Srobert     // There is only a single DW_OP_llvm_arg at the start of the expression,
6321*d415bd75Srobert     // so it can be omitted along with DIArglist.
6322*d415bd75Srobert     assert(NewExpr[1] == 0 &&
6323*d415bd75Srobert            "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6324*d415bd75Srobert     llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
6325*d415bd75Srobert     updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps);
6326*d415bd75Srobert   } else {
6327*d415bd75Srobert     // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
6328*d415bd75Srobert     updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr);
6329*d415bd75Srobert   }
6330*d415bd75Srobert 
6331*d415bd75Srobert   // If the DIExpression was previously empty then add the stack terminator.
6332*d415bd75Srobert   // Non-empty expressions have only had elements inserted into them and so the
6333*d415bd75Srobert   // terminator should already be present e.g. stack_value or fragment.
6334*d415bd75Srobert   DIExpression *SalvageExpr = DVIRec.DVI->getExpression();
6335*d415bd75Srobert   if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
6336*d415bd75Srobert     SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
6337*d415bd75Srobert     DVIRec.DVI->setExpression(SalvageExpr);
6338*d415bd75Srobert   }
6339*d415bd75Srobert }
6340*d415bd75Srobert 
6341*d415bd75Srobert /// Cached location ops may be erased during LSR, in which case an undef is
6342*d415bd75Srobert /// required when restoring from the cache. The type of that location is no
6343*d415bd75Srobert /// longer available, so just use int8. The undef will be replaced by one or
6344*d415bd75Srobert /// more locations later when a SCEVDbgValueBuilder selects alternative
6345*d415bd75Srobert /// locations to use for the salvage.
getValueOrUndef(WeakVH & VH,LLVMContext & C)6346*d415bd75Srobert static Value *getValueOrUndef(WeakVH &VH, LLVMContext &C) {
6347*d415bd75Srobert   return (VH) ? VH : UndefValue::get(llvm::Type::getInt8Ty(C));
6348*d415bd75Srobert }
6349*d415bd75Srobert 
6350*d415bd75Srobert /// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
restorePreTransformState(DVIRecoveryRec & DVIRec)6351*d415bd75Srobert static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
6352*d415bd75Srobert   LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
6353*d415bd75Srobert                     << "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n');
6354*d415bd75Srobert   assert(DVIRec.Expr && "Expected an expression");
6355*d415bd75Srobert   DVIRec.DVI->setExpression(DVIRec.Expr);
6356*d415bd75Srobert 
6357*d415bd75Srobert   // Even a single location-op may be inside a DIArgList and referenced with
6358*d415bd75Srobert   // DW_OP_LLVM_arg, which is valid only with a DIArgList.
6359*d415bd75Srobert   if (!DVIRec.HadLocationArgList) {
6360*d415bd75Srobert     assert(DVIRec.LocationOps.size() == 1 &&
6361*d415bd75Srobert            "Unexpected number of location ops.");
6362*d415bd75Srobert     // LSR's unsuccessful salvage attempt may have added DIArgList, which in
6363*d415bd75Srobert     // this case was not present before, so force the location back to a single
6364*d415bd75Srobert     // uncontained Value.
6365*d415bd75Srobert     Value *CachedValue =
6366*d415bd75Srobert         getValueOrUndef(DVIRec.LocationOps[0], DVIRec.DVI->getContext());
6367*d415bd75Srobert     DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue));
6368*d415bd75Srobert   } else {
6369*d415bd75Srobert     SmallVector<ValueAsMetadata *, 3> MetadataLocs;
6370*d415bd75Srobert     for (WeakVH VH : DVIRec.LocationOps) {
6371*d415bd75Srobert       Value *CachedValue = getValueOrUndef(VH, DVIRec.DVI->getContext());
6372*d415bd75Srobert       MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
6373*d415bd75Srobert     }
6374*d415bd75Srobert     auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
6375*d415bd75Srobert     DVIRec.DVI->setRawLocation(
6376*d415bd75Srobert         llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef));
6377*d415bd75Srobert   }
6378*d415bd75Srobert   LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n');
6379*d415bd75Srobert }
6380*d415bd75Srobert 
SalvageDVI(llvm::Loop * L,ScalarEvolution & SE,llvm::PHINode * LSRInductionVar,DVIRecoveryRec & DVIRec,const SCEV * SCEVInductionVar,SCEVDbgValueBuilder IterCountExpr)6381*d415bd75Srobert static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
6382*d415bd75Srobert                        llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
6383*d415bd75Srobert                        const SCEV *SCEVInductionVar,
6384*d415bd75Srobert                        SCEVDbgValueBuilder IterCountExpr) {
6385*d415bd75Srobert   if (!DVIRec.DVI->isKillLocation())
638673471bf0Spatrick     return false;
638773471bf0Spatrick 
6388*d415bd75Srobert   // LSR may have caused several changes to the dbg.value in the failed salvage
6389*d415bd75Srobert   // attempt. So restore the DIExpression, the location ops and also the
6390*d415bd75Srobert   // location ops format, which is always DIArglist for multiple ops, but only
6391*d415bd75Srobert   // sometimes for a single op.
6392*d415bd75Srobert   restorePreTransformState(DVIRec);
6393*d415bd75Srobert 
6394*d415bd75Srobert   // LocationOpIndexMap[i] will store the post-LSR location index of
6395*d415bd75Srobert   // the non-optimised out location at pre-LSR index i.
6396*d415bd75Srobert   SmallVector<int64_t, 2> LocationOpIndexMap;
6397*d415bd75Srobert   LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
6398*d415bd75Srobert   SmallVector<Value *, 2> NewLocationOps;
6399*d415bd75Srobert   NewLocationOps.push_back(LSRInductionVar);
6400*d415bd75Srobert 
6401*d415bd75Srobert   for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6402*d415bd75Srobert     WeakVH VH = DVIRec.LocationOps[i];
6403*d415bd75Srobert     // Place the locations not optimised out in the list first, avoiding
6404*d415bd75Srobert     // inserts later. The map is used to update the DIExpression's
6405*d415bd75Srobert     // DW_OP_LLVM_arg arguments as the expression is updated.
6406*d415bd75Srobert     if (VH && !isa<UndefValue>(VH)) {
6407*d415bd75Srobert       NewLocationOps.push_back(VH);
6408*d415bd75Srobert       LocationOpIndexMap[i] = NewLocationOps.size() - 1;
6409*d415bd75Srobert       LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
6410*d415bd75Srobert                         << " now at index " << LocationOpIndexMap[i] << "\n");
6411*d415bd75Srobert       continue;
6412*d415bd75Srobert     }
6413*d415bd75Srobert 
6414*d415bd75Srobert     // It's possible that a value referred to in the SCEV may have been
6415*d415bd75Srobert     // optimised out by LSR.
6416*d415bd75Srobert     if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
6417*d415bd75Srobert         SE.containsUndefs(DVIRec.SCEVs[i])) {
6418*d415bd75Srobert       LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
6419*d415bd75Srobert                         << " refers to a location that is now undef or erased. "
6420*d415bd75Srobert                            "Salvage abandoned.\n");
642173471bf0Spatrick       return false;
6422*d415bd75Srobert     }
642373471bf0Spatrick 
6424*d415bd75Srobert     LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
6425*d415bd75Srobert                       << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
642673471bf0Spatrick 
6427*d415bd75Srobert     DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
6428*d415bd75Srobert     SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6429*d415bd75Srobert 
6430*d415bd75Srobert     // Create an offset-based salvage expression if possible, as it requires
6431*d415bd75Srobert     // less DWARF ops than an iteration count-based expression.
6432*d415bd75Srobert     if (std::optional<APInt> Offset =
6433*d415bd75Srobert             SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
6434*d415bd75Srobert       if (Offset->getMinSignedBits() <= 64)
6435*d415bd75Srobert         SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);
6436*d415bd75Srobert     } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6437*d415bd75Srobert                                                  SE))
643873471bf0Spatrick       return false;
6439*d415bd75Srobert   }
644073471bf0Spatrick 
6441*d415bd75Srobert   // Merge the DbgValueBuilder generated expressions and the original
6442*d415bd75Srobert   // DIExpression, place the result into an new vector.
6443*d415bd75Srobert   SmallVector<uint64_t, 3> NewExpr;
6444*d415bd75Srobert   if (DVIRec.Expr->getNumElements() == 0) {
6445*d415bd75Srobert     assert(DVIRec.RecoveryExprs.size() == 1 &&
6446*d415bd75Srobert            "Expected only a single recovery expression for an empty "
6447*d415bd75Srobert            "DIExpression.");
6448*d415bd75Srobert     assert(DVIRec.RecoveryExprs[0] &&
6449*d415bd75Srobert            "Expected a SCEVDbgSalvageBuilder for location 0");
6450*d415bd75Srobert     SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
6451*d415bd75Srobert     B->appendToVectors(NewExpr, NewLocationOps);
6452*d415bd75Srobert   }
6453*d415bd75Srobert   for (const auto &Op : DVIRec.Expr->expr_ops()) {
6454*d415bd75Srobert     // Most Ops needn't be updated.
6455*d415bd75Srobert     if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
6456*d415bd75Srobert       Op.appendToVector(NewExpr);
6457*d415bd75Srobert       continue;
6458*d415bd75Srobert     }
645973471bf0Spatrick 
6460*d415bd75Srobert     uint64_t LocationArgIndex = Op.getArg(0);
6461*d415bd75Srobert     SCEVDbgValueBuilder *DbgBuilder =
6462*d415bd75Srobert         DVIRec.RecoveryExprs[LocationArgIndex].get();
6463*d415bd75Srobert     // The location doesn't have s SCEVDbgValueBuilder, so LSR did not
6464*d415bd75Srobert     // optimise it away. So just translate the argument to the updated
6465*d415bd75Srobert     // location index.
6466*d415bd75Srobert     if (!DbgBuilder) {
6467*d415bd75Srobert       NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
6468*d415bd75Srobert       assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
6469*d415bd75Srobert              "Expected a positive index for the location-op position.");
6470*d415bd75Srobert       NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
6471*d415bd75Srobert       continue;
6472*d415bd75Srobert     }
6473*d415bd75Srobert     // The location has a recovery expression.
6474*d415bd75Srobert     DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
6475*d415bd75Srobert   }
6476*d415bd75Srobert 
6477*d415bd75Srobert   UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
6478*d415bd75Srobert   LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n");
647973471bf0Spatrick   return true;
648073471bf0Spatrick }
648173471bf0Spatrick 
6482*d415bd75Srobert /// Obtain an expression for the iteration count, then attempt to salvage the
6483*d415bd75Srobert /// dbg.value intrinsics.
6484*d415bd75Srobert static void
DbgRewriteSalvageableDVIs(llvm::Loop * L,ScalarEvolution & SE,llvm::PHINode * LSRInductionVar,SmallVector<std::unique_ptr<DVIRecoveryRec>,2> & DVIToUpdate)648573471bf0Spatrick DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
648673471bf0Spatrick                           llvm::PHINode *LSRInductionVar,
6487*d415bd75Srobert                           SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
648873471bf0Spatrick   if (DVIToUpdate.empty())
6489*d415bd75Srobert     return;
649073471bf0Spatrick 
649173471bf0Spatrick   const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
649273471bf0Spatrick   assert(SCEVInductionVar &&
649373471bf0Spatrick          "Anticipated a SCEV for the post-LSR induction variable");
649473471bf0Spatrick 
649573471bf0Spatrick   if (const SCEVAddRecExpr *IVAddRec =
649673471bf0Spatrick           dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
649773471bf0Spatrick     if (!IVAddRec->isAffine())
6498*d415bd75Srobert       return;
649973471bf0Spatrick 
6500*d415bd75Srobert     // Prevent translation using excessive resources.
6501*d415bd75Srobert     if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6502*d415bd75Srobert       return;
6503*d415bd75Srobert 
6504*d415bd75Srobert     // The iteration count is required to recover location values.
650573471bf0Spatrick     SCEVDbgValueBuilder IterCountExpr;
6506*d415bd75Srobert     IterCountExpr.pushLocation(LSRInductionVar);
650773471bf0Spatrick     if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6508*d415bd75Srobert       return;
650973471bf0Spatrick 
651073471bf0Spatrick     LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
651173471bf0Spatrick                       << '\n');
651273471bf0Spatrick 
651373471bf0Spatrick     for (auto &DVIRec : DVIToUpdate) {
6514*d415bd75Srobert       SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6515*d415bd75Srobert                  IterCountExpr);
651673471bf0Spatrick     }
651773471bf0Spatrick   }
651873471bf0Spatrick }
651973471bf0Spatrick 
652073471bf0Spatrick /// Identify and cache salvageable DVI locations and expressions along with the
6521*d415bd75Srobert /// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6522*d415bd75Srobert /// cacheing and salvaging.
DbgGatherSalvagableDVI(Loop * L,ScalarEvolution & SE,SmallVector<std::unique_ptr<DVIRecoveryRec>,2> & SalvageableDVISCEVs,SmallSet<AssertingVH<DbgValueInst>,2> & DVIHandles)6523*d415bd75Srobert static void DbgGatherSalvagableDVI(
6524*d415bd75Srobert     Loop *L, ScalarEvolution &SE,
6525*d415bd75Srobert     SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
652673471bf0Spatrick     SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
6527*d415bd75Srobert   for (const auto &B : L->getBlocks()) {
652873471bf0Spatrick     for (auto &I : *B) {
652973471bf0Spatrick       auto DVI = dyn_cast<DbgValueInst>(&I);
653073471bf0Spatrick       if (!DVI)
653173471bf0Spatrick         continue;
6532*d415bd75Srobert       // Ensure that if any location op is undef that the dbg.vlue is not
6533*d415bd75Srobert       // cached.
6534*d415bd75Srobert       if (DVI->isKillLocation())
653573471bf0Spatrick         continue;
653673471bf0Spatrick 
6537*d415bd75Srobert       // Check that the location op SCEVs are suitable for translation to
6538*d415bd75Srobert       // DIExpression.
6539*d415bd75Srobert       const auto &HasTranslatableLocationOps =
6540*d415bd75Srobert           [&](const DbgValueInst *DVI) -> bool {
6541*d415bd75Srobert         for (const auto LocOp : DVI->location_ops()) {
6542*d415bd75Srobert           if (!LocOp)
6543*d415bd75Srobert             return false;
6544*d415bd75Srobert 
6545*d415bd75Srobert           if (!SE.isSCEVable(LocOp->getType()))
6546*d415bd75Srobert             return false;
6547*d415bd75Srobert 
6548*d415bd75Srobert           const SCEV *S = SE.getSCEV(LocOp);
6549*d415bd75Srobert           if (SE.containsUndefs(S))
6550*d415bd75Srobert             return false;
6551*d415bd75Srobert         }
6552*d415bd75Srobert         return true;
6553*d415bd75Srobert       };
6554*d415bd75Srobert 
6555*d415bd75Srobert       if (!HasTranslatableLocationOps(DVI))
655673471bf0Spatrick         continue;
655773471bf0Spatrick 
6558*d415bd75Srobert       std::unique_ptr<DVIRecoveryRec> NewRec =
6559*d415bd75Srobert           std::make_unique<DVIRecoveryRec>(DVI);
6560*d415bd75Srobert       // Each location Op may need a SCEVDbgValueBuilder in order to recover it.
6561*d415bd75Srobert       // Pre-allocating a vector will enable quick lookups of the builder later
6562*d415bd75Srobert       // during the salvage.
6563*d415bd75Srobert       NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps());
6564*d415bd75Srobert       for (const auto LocOp : DVI->location_ops()) {
6565*d415bd75Srobert         NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
6566*d415bd75Srobert         NewRec->LocationOps.push_back(LocOp);
6567*d415bd75Srobert         NewRec->HadLocationArgList = DVI->hasArgList();
6568*d415bd75Srobert       }
6569*d415bd75Srobert       SalvageableDVISCEVs.push_back(std::move(NewRec));
657073471bf0Spatrick       DVIHandles.insert(DVI);
657173471bf0Spatrick     }
657273471bf0Spatrick   }
657373471bf0Spatrick }
657473471bf0Spatrick 
657573471bf0Spatrick /// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
657673471bf0Spatrick /// any PHi from the loop header is usable, but may have less chance of
657773471bf0Spatrick /// surviving subsequent transforms.
GetInductionVariable(const Loop & L,ScalarEvolution & SE,const LSRInstance & LSR)657873471bf0Spatrick static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
657973471bf0Spatrick                                            const LSRInstance &LSR) {
6580*d415bd75Srobert 
6581*d415bd75Srobert   auto IsSuitableIV = [&](PHINode *P) {
6582*d415bd75Srobert     if (!SE.isSCEVable(P->getType()))
6583*d415bd75Srobert       return false;
6584*d415bd75Srobert     if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
6585*d415bd75Srobert       return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
6586*d415bd75Srobert     return false;
6587*d415bd75Srobert   };
6588*d415bd75Srobert 
6589*d415bd75Srobert   // For now, just pick the first IV that was generated and inserted by
6590*d415bd75Srobert   // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
6591*d415bd75Srobert   // by subsequent transforms.
659273471bf0Spatrick   for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
659373471bf0Spatrick     if (!IV)
659473471bf0Spatrick       continue;
659573471bf0Spatrick 
6596*d415bd75Srobert     // There should only be PHI node IVs.
6597*d415bd75Srobert     PHINode *P = cast<PHINode>(&*IV);
6598*d415bd75Srobert 
6599*d415bd75Srobert     if (IsSuitableIV(P))
6600*d415bd75Srobert       return P;
660173471bf0Spatrick   }
660273471bf0Spatrick 
6603*d415bd75Srobert   for (PHINode &P : L.getHeader()->phis()) {
6604*d415bd75Srobert     if (IsSuitableIV(&P))
6605*d415bd75Srobert       return &P;
660673471bf0Spatrick   }
660773471bf0Spatrick   return nullptr;
660873471bf0Spatrick }
660973471bf0Spatrick 
6610*d415bd75Srobert static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *>>
canFoldTermCondOfLoop(Loop * L,ScalarEvolution & SE,DominatorTree & DT,const LoopInfo & LI)6611*d415bd75Srobert canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
6612*d415bd75Srobert                       const LoopInfo &LI) {
6613*d415bd75Srobert   if (!L->isInnermost()) {
6614*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
6615*d415bd75Srobert     return std::nullopt;
6616*d415bd75Srobert   }
6617*d415bd75Srobert   // Only inspect on simple loop structure
6618*d415bd75Srobert   if (!L->isLoopSimplifyForm()) {
6619*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
6620*d415bd75Srobert     return std::nullopt;
6621*d415bd75Srobert   }
6622*d415bd75Srobert 
6623*d415bd75Srobert   if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
6624*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
6625*d415bd75Srobert     return std::nullopt;
6626*d415bd75Srobert   }
6627*d415bd75Srobert 
6628*d415bd75Srobert   BasicBlock *LoopLatch = L->getLoopLatch();
6629*d415bd75Srobert 
6630*d415bd75Srobert   // TODO: Can we do something for greater than and less than?
6631*d415bd75Srobert   // Terminating condition is foldable when it is an eq/ne icmp
6632*d415bd75Srobert   BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
6633*d415bd75Srobert   if (BI->isUnconditional())
6634*d415bd75Srobert     return std::nullopt;
6635*d415bd75Srobert   Value *TermCond = BI->getCondition();
6636*d415bd75Srobert   if (!isa<ICmpInst>(TermCond) || !cast<ICmpInst>(TermCond)->isEquality()) {
6637*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an "
6638*d415bd75Srobert                          "ICmpInst::eq / ICmpInst::ne\n");
6639*d415bd75Srobert     return std::nullopt;
6640*d415bd75Srobert   }
6641*d415bd75Srobert   if (!TermCond->hasOneUse()) {
6642*d415bd75Srobert     LLVM_DEBUG(
6643*d415bd75Srobert         dbgs()
6644*d415bd75Srobert         << "Cannot replace terminating condition with more than one use\n");
6645*d415bd75Srobert     return std::nullopt;
6646*d415bd75Srobert   }
6647*d415bd75Srobert 
6648*d415bd75Srobert   // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it
6649*d415bd75Srobert   // is only used by the terminating condition. To check for this, we may need
6650*d415bd75Srobert   // to traverse through a chain of use-def until we can examine the final
6651*d415bd75Srobert   // usage.
6652*d415bd75Srobert   //         *----------------------*
6653*d415bd75Srobert   //   *---->|  LoopHeader:         |
6654*d415bd75Srobert   //   |     |  PrimaryIV = phi ... |
6655*d415bd75Srobert   //   |     *----------------------*
6656*d415bd75Srobert   //   |              |
6657*d415bd75Srobert   //   |              |
6658*d415bd75Srobert   //   |           chain of
6659*d415bd75Srobert   //   |          single use
6660*d415bd75Srobert   // used by          |
6661*d415bd75Srobert   //  phi             |
6662*d415bd75Srobert   //   |            Value
6663*d415bd75Srobert   //   |          /       \
6664*d415bd75Srobert   //   |     chain of     chain of
6665*d415bd75Srobert   //   |    single use     single use
6666*d415bd75Srobert   //   |      /               \
6667*d415bd75Srobert   //   |     /                 \
6668*d415bd75Srobert   //   *- Value                Value --> used by terminating condition
6669*d415bd75Srobert   auto IsToFold = [&](PHINode &PN) -> bool {
6670*d415bd75Srobert     Value *V = &PN;
6671*d415bd75Srobert 
6672*d415bd75Srobert     while (V->getNumUses() == 1)
6673*d415bd75Srobert       V = *V->user_begin();
6674*d415bd75Srobert 
6675*d415bd75Srobert     if (V->getNumUses() != 2)
6676*d415bd75Srobert       return false;
6677*d415bd75Srobert 
6678*d415bd75Srobert     Value *VToPN = nullptr;
6679*d415bd75Srobert     Value *VToTermCond = nullptr;
6680*d415bd75Srobert     for (User *U : V->users()) {
6681*d415bd75Srobert       while (U->getNumUses() == 1) {
6682*d415bd75Srobert         if (isa<PHINode>(U))
6683*d415bd75Srobert           VToPN = U;
6684*d415bd75Srobert         if (U == TermCond)
6685*d415bd75Srobert           VToTermCond = U;
6686*d415bd75Srobert         U = *U->user_begin();
6687*d415bd75Srobert       }
6688*d415bd75Srobert     }
6689*d415bd75Srobert     return VToPN && VToTermCond;
6690*d415bd75Srobert   };
6691*d415bd75Srobert 
6692*d415bd75Srobert   // If this is an IV which we could replace the terminating condition, return
6693*d415bd75Srobert   // the final value of the alternative IV on the last iteration.
6694*d415bd75Srobert   auto getAlternateIVEnd = [&](PHINode &PN) -> const SCEV * {
6695*d415bd75Srobert     // FIXME: This does not properly account for overflow.
6696*d415bd75Srobert     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
6697*d415bd75Srobert     const SCEV *BECount = SE.getBackedgeTakenCount(L);
6698*d415bd75Srobert     const SCEV *TermValueS = SE.getAddExpr(
6699*d415bd75Srobert         AddRec->getOperand(0),
6700*d415bd75Srobert         SE.getTruncateOrZeroExtend(
6701*d415bd75Srobert             SE.getMulExpr(
6702*d415bd75Srobert                 AddRec->getOperand(1),
6703*d415bd75Srobert                 SE.getTruncateOrZeroExtend(
6704*d415bd75Srobert                     SE.getAddExpr(BECount, SE.getOne(BECount->getType())),
6705*d415bd75Srobert                     AddRec->getOperand(1)->getType())),
6706*d415bd75Srobert             AddRec->getOperand(0)->getType()));
6707*d415bd75Srobert     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
6708*d415bd75Srobert     SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
6709*d415bd75Srobert     if (!Expander.isSafeToExpand(TermValueS)) {
6710*d415bd75Srobert       LLVM_DEBUG(
6711*d415bd75Srobert           dbgs() << "Is not safe to expand terminating value for phi node" << PN
6712*d415bd75Srobert                  << "\n");
6713*d415bd75Srobert       return nullptr;
6714*d415bd75Srobert     }
6715*d415bd75Srobert     return TermValueS;
6716*d415bd75Srobert   };
6717*d415bd75Srobert 
6718*d415bd75Srobert   PHINode *ToFold = nullptr;
6719*d415bd75Srobert   PHINode *ToHelpFold = nullptr;
6720*d415bd75Srobert   const SCEV *TermValueS = nullptr;
6721*d415bd75Srobert 
6722*d415bd75Srobert   for (PHINode &PN : L->getHeader()->phis()) {
6723*d415bd75Srobert     if (!SE.isSCEVable(PN.getType())) {
6724*d415bd75Srobert       LLVM_DEBUG(dbgs() << "IV of phi '" << PN
6725*d415bd75Srobert                         << "' is not SCEV-able, not qualified for the "
6726*d415bd75Srobert                            "terminating condition folding.\n");
6727*d415bd75Srobert       continue;
6728*d415bd75Srobert     }
6729*d415bd75Srobert     const SCEV *S = SE.getSCEV(&PN);
6730*d415bd75Srobert     const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
6731*d415bd75Srobert     // Only speculate on affine AddRec
6732*d415bd75Srobert     if (!AddRec || !AddRec->isAffine()) {
6733*d415bd75Srobert       LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
6734*d415bd75Srobert                         << "' is not an affine add recursion, not qualified "
6735*d415bd75Srobert                            "for the terminating condition folding.\n");
6736*d415bd75Srobert       continue;
6737*d415bd75Srobert     }
6738*d415bd75Srobert 
6739*d415bd75Srobert     if (IsToFold(PN))
6740*d415bd75Srobert       ToFold = &PN;
6741*d415bd75Srobert     else if (auto P = getAlternateIVEnd(PN)) {
6742*d415bd75Srobert       ToHelpFold = &PN;
6743*d415bd75Srobert       TermValueS = P;
6744*d415bd75Srobert     }
6745*d415bd75Srobert   }
6746*d415bd75Srobert 
6747*d415bd75Srobert   LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
6748*d415bd75Srobert                  << "Cannot find other AddRec IV to help folding\n";);
6749*d415bd75Srobert 
6750*d415bd75Srobert   LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
6751*d415bd75Srobert              << "\nFound loop that can fold terminating condition\n"
6752*d415bd75Srobert              << "  BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
6753*d415bd75Srobert              << "  TermCond: " << *TermCond << "\n"
6754*d415bd75Srobert              << "  BrandInst: " << *BI << "\n"
6755*d415bd75Srobert              << "  ToFold: " << *ToFold << "\n"
6756*d415bd75Srobert              << "  ToHelpFold: " << *ToHelpFold << "\n");
6757*d415bd75Srobert 
6758*d415bd75Srobert   if (!ToFold || !ToHelpFold)
6759*d415bd75Srobert     return std::nullopt;
6760*d415bd75Srobert   return std::make_tuple(ToFold, ToHelpFold, TermValueS);
6761*d415bd75Srobert }
6762*d415bd75Srobert 
ReduceLoopStrength(Loop * L,IVUsers & IU,ScalarEvolution & SE,DominatorTree & DT,LoopInfo & LI,const TargetTransformInfo & TTI,AssumptionCache & AC,TargetLibraryInfo & TLI,MemorySSA * MSSA)676309467b48Spatrick static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
676409467b48Spatrick                                DominatorTree &DT, LoopInfo &LI,
676509467b48Spatrick                                const TargetTransformInfo &TTI,
6766097a140dSpatrick                                AssumptionCache &AC, TargetLibraryInfo &TLI,
6767097a140dSpatrick                                MemorySSA *MSSA) {
676809467b48Spatrick 
676973471bf0Spatrick   // Debug preservation - before we start removing anything identify which DVI
677073471bf0Spatrick   // meet the salvageable criteria and store their DIExpression and SCEVs.
6771*d415bd75Srobert   SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
677273471bf0Spatrick   SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
6773*d415bd75Srobert   DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
677473471bf0Spatrick 
677509467b48Spatrick   bool Changed = false;
6776097a140dSpatrick   std::unique_ptr<MemorySSAUpdater> MSSAU;
6777097a140dSpatrick   if (MSSA)
6778097a140dSpatrick     MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
677909467b48Spatrick 
678009467b48Spatrick   // Run the main LSR transformation.
678173471bf0Spatrick   const LSRInstance &Reducer =
678273471bf0Spatrick       LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
678373471bf0Spatrick   Changed |= Reducer.getChanged();
678409467b48Spatrick 
678509467b48Spatrick   // Remove any extra phis created by processing inner loops.
6786097a140dSpatrick   Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
678709467b48Spatrick   if (EnablePhiElim && L->isLoopSimplifyForm()) {
678809467b48Spatrick     SmallVector<WeakTrackingVH, 16> DeadInsts;
678909467b48Spatrick     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
679073471bf0Spatrick     SCEVExpander Rewriter(SE, DL, "lsr", false);
679109467b48Spatrick #ifndef NDEBUG
679209467b48Spatrick     Rewriter.setDebugType(DEBUG_TYPE);
679309467b48Spatrick #endif
679409467b48Spatrick     unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
679509467b48Spatrick     if (numFolded) {
679609467b48Spatrick       Changed = true;
6797097a140dSpatrick       RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
6798097a140dSpatrick                                                            MSSAU.get());
6799097a140dSpatrick       DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
680009467b48Spatrick     }
680109467b48Spatrick   }
6802*d415bd75Srobert   // LSR may at times remove all uses of an induction variable from a loop.
6803*d415bd75Srobert   // The only remaining use is the PHI in the exit block.
6804*d415bd75Srobert   // When this is the case, if the exit value of the IV can be calculated using
6805*d415bd75Srobert   // SCEV, we can replace the exit block PHI with the final value of the IV and
6806*d415bd75Srobert   // skip the updates in each loop iteration.
6807*d415bd75Srobert   if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
6808*d415bd75Srobert     SmallVector<WeakTrackingVH, 16> DeadInsts;
6809*d415bd75Srobert     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
6810*d415bd75Srobert     SCEVExpander Rewriter(SE, DL, "lsr", true);
6811*d415bd75Srobert     int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
6812*d415bd75Srobert                                          UnusedIndVarInLoop, DeadInsts);
6813*d415bd75Srobert     if (Rewrites) {
6814*d415bd75Srobert       Changed = true;
6815*d415bd75Srobert       RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
6816*d415bd75Srobert                                                            MSSAU.get());
6817*d415bd75Srobert       DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
6818*d415bd75Srobert     }
6819*d415bd75Srobert   }
682073471bf0Spatrick 
6821*d415bd75Srobert   if (AllowTerminatingConditionFoldingAfterLSR) {
6822*d415bd75Srobert     if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) {
6823*d415bd75Srobert       auto [ToFold, ToHelpFold, TermValueS] = *Opt;
6824*d415bd75Srobert 
6825*d415bd75Srobert       Changed = true;
6826*d415bd75Srobert       NumTermFold++;
6827*d415bd75Srobert 
6828*d415bd75Srobert       BasicBlock *LoopPreheader = L->getLoopPreheader();
6829*d415bd75Srobert       BasicBlock *LoopLatch = L->getLoopLatch();
6830*d415bd75Srobert 
6831*d415bd75Srobert       (void)ToFold;
6832*d415bd75Srobert       LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
6833*d415bd75Srobert                         << *ToFold << "\n"
6834*d415bd75Srobert                         << "New term-cond phi-node:\n"
6835*d415bd75Srobert                         << *ToHelpFold << "\n");
6836*d415bd75Srobert 
6837*d415bd75Srobert       Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
6838*d415bd75Srobert       (void)StartValue;
6839*d415bd75Srobert       Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
6840*d415bd75Srobert 
6841*d415bd75Srobert       // SCEVExpander for both use in preheader and latch
6842*d415bd75Srobert       const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
6843*d415bd75Srobert       SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
6844*d415bd75Srobert       SCEVExpanderCleaner ExpCleaner(Expander);
6845*d415bd75Srobert 
6846*d415bd75Srobert       assert(Expander.isSafeToExpand(TermValueS) &&
6847*d415bd75Srobert              "Terminating value was checked safe in canFoldTerminatingCondition");
6848*d415bd75Srobert 
6849*d415bd75Srobert       // Create new terminating value at loop header
6850*d415bd75Srobert       Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
6851*d415bd75Srobert                                                 LoopPreheader->getTerminator());
6852*d415bd75Srobert 
6853*d415bd75Srobert       LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
6854*d415bd75Srobert                         << *StartValue << "\n"
6855*d415bd75Srobert                         << "Terminating value of new term-cond phi-node:\n"
6856*d415bd75Srobert                         << *TermValue << "\n");
6857*d415bd75Srobert 
6858*d415bd75Srobert       // Create new terminating condition at loop latch
6859*d415bd75Srobert       BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
6860*d415bd75Srobert       ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
6861*d415bd75Srobert       IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
6862*d415bd75Srobert       // FIXME: We are adding a use of an IV here without account for poison safety.
6863*d415bd75Srobert       // This is incorrect.
6864*d415bd75Srobert       Value *NewTermCond = LatchBuilder.CreateICmp(
6865*d415bd75Srobert           OldTermCond->getPredicate(), LoopValue, TermValue,
6866*d415bd75Srobert           "lsr_fold_term_cond.replaced_term_cond");
6867*d415bd75Srobert 
6868*d415bd75Srobert       LLVM_DEBUG(dbgs() << "Old term-cond:\n"
6869*d415bd75Srobert                         << *OldTermCond << "\n"
6870*d415bd75Srobert                         << "New term-cond:\b" << *NewTermCond << "\n");
6871*d415bd75Srobert 
6872*d415bd75Srobert       BI->setCondition(NewTermCond);
6873*d415bd75Srobert 
6874*d415bd75Srobert       OldTermCond->eraseFromParent();
6875*d415bd75Srobert       DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
6876*d415bd75Srobert 
6877*d415bd75Srobert       ExpCleaner.markResultUsed();
6878*d415bd75Srobert     }
6879*d415bd75Srobert   }
6880*d415bd75Srobert 
6881*d415bd75Srobert   if (SalvageableDVIRecords.empty())
688273471bf0Spatrick     return Changed;
688373471bf0Spatrick 
688473471bf0Spatrick   // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
688573471bf0Spatrick   // expressions composed using the derived iteration count.
688673471bf0Spatrick   // TODO: Allow for multiple IV references for nested AddRecSCEVs
6887*d415bd75Srobert   for (const auto &L : LI) {
688873471bf0Spatrick     if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
6889*d415bd75Srobert       DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);
689073471bf0Spatrick     else {
689173471bf0Spatrick       LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
689273471bf0Spatrick                            "could not be identified.\n");
689373471bf0Spatrick     }
689473471bf0Spatrick   }
689573471bf0Spatrick 
6896*d415bd75Srobert   for (auto &Rec : SalvageableDVIRecords)
6897*d415bd75Srobert     Rec->clear();
6898*d415bd75Srobert   SalvageableDVIRecords.clear();
689973471bf0Spatrick   DVIHandles.clear();
690009467b48Spatrick   return Changed;
690109467b48Spatrick }
690209467b48Spatrick 
runOnLoop(Loop * L,LPPassManager &)690309467b48Spatrick bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
690409467b48Spatrick   if (skipLoop(L))
690509467b48Spatrick     return false;
690609467b48Spatrick 
690709467b48Spatrick   auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
690809467b48Spatrick   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
690909467b48Spatrick   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
691009467b48Spatrick   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
691109467b48Spatrick   const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
691209467b48Spatrick       *L->getHeader()->getParent());
691309467b48Spatrick   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
691409467b48Spatrick       *L->getHeader()->getParent());
6915097a140dSpatrick   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
691609467b48Spatrick       *L->getHeader()->getParent());
6917097a140dSpatrick   auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
6918097a140dSpatrick   MemorySSA *MSSA = nullptr;
6919097a140dSpatrick   if (MSSAAnalysis)
6920097a140dSpatrick     MSSA = &MSSAAnalysis->getMSSA();
6921097a140dSpatrick   return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
692209467b48Spatrick }
692309467b48Spatrick 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)692409467b48Spatrick PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
692509467b48Spatrick                                               LoopStandardAnalysisResults &AR,
692609467b48Spatrick                                               LPMUpdater &) {
692709467b48Spatrick   if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
6928097a140dSpatrick                           AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))
692909467b48Spatrick     return PreservedAnalyses::all();
693009467b48Spatrick 
6931097a140dSpatrick   auto PA = getLoopPassPreservedAnalyses();
6932097a140dSpatrick   if (AR.MSSA)
6933097a140dSpatrick     PA.preserve<MemorySSAAnalysis>();
6934097a140dSpatrick   return PA;
693509467b48Spatrick }
693609467b48Spatrick 
693709467b48Spatrick char LoopStrengthReduce::ID = 0;
693809467b48Spatrick 
693909467b48Spatrick INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
694009467b48Spatrick                       "Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)694109467b48Spatrick INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
694209467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
694309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
694409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
694509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
694609467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
694709467b48Spatrick INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
694809467b48Spatrick                     "Loop Strength Reduction", false, false)
694909467b48Spatrick 
695009467b48Spatrick Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
6951