10b57cec5SDimitry Andric //===- StraightLineStrengthReduce.cpp - -----------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements straight-line strength reduction (SLSR). Unlike loop
100b57cec5SDimitry Andric // strength reduction, this algorithm is designed to reduce arithmetic
110b57cec5SDimitry Andric // redundancy in straight-line code instead of loops. It has proven to be
120b57cec5SDimitry Andric // effective in simplifying arithmetic statements derived from an unrolled loop.
130b57cec5SDimitry Andric // It can also simplify the logic of SeparateConstOffsetFromGEP.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // There are many optimizations we can perform in the domain of SLSR. This file
160b57cec5SDimitry Andric // for now contains only an initial step. Specifically, we look for strength
170b57cec5SDimitry Andric // reduction candidates in the following forms:
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // Form 1: B + i * S
200b57cec5SDimitry Andric // Form 2: (B + i) * S
210b57cec5SDimitry Andric // Form 3: &B[i * S]
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric // where S is an integer variable, and i is a constant integer. If we found two
240b57cec5SDimitry Andric // candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
250b57cec5SDimitry Andric // in a simpler way with respect to S1. For example,
260b57cec5SDimitry Andric //
270b57cec5SDimitry Andric // S1: X = B + i * S
280b57cec5SDimitry Andric // S2: Y = B + i' * S   => X + (i' - i) * S
290b57cec5SDimitry Andric //
300b57cec5SDimitry Andric // S1: X = (B + i) * S
310b57cec5SDimitry Andric // S2: Y = (B + i') * S => X + (i' - i) * S
320b57cec5SDimitry Andric //
330b57cec5SDimitry Andric // S1: X = &B[i * S]
340b57cec5SDimitry Andric // S2: Y = &B[i' * S]   => &X[(i' - i) * S]
350b57cec5SDimitry Andric //
360b57cec5SDimitry Andric // Note: (i' - i) * S is folded to the extent possible.
370b57cec5SDimitry Andric //
380b57cec5SDimitry Andric // This rewriting is in general a good idea. The code patterns we focus on
390b57cec5SDimitry Andric // usually come from loop unrolling, so (i' - i) * S is likely the same
400b57cec5SDimitry Andric // across iterations and can be reused. When that happens, the optimized form
410b57cec5SDimitry Andric // takes only one add starting from the second iteration.
420b57cec5SDimitry Andric //
430b57cec5SDimitry Andric // When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
440b57cec5SDimitry Andric // multiple bases, we choose to rewrite S2 with respect to its "immediate"
450b57cec5SDimitry Andric // basis, the basis that is the closest ancestor in the dominator tree.
460b57cec5SDimitry Andric //
470b57cec5SDimitry Andric // TODO:
480b57cec5SDimitry Andric //
490b57cec5SDimitry Andric // - Floating point arithmetics when fast math is enabled.
500b57cec5SDimitry Andric //
510b57cec5SDimitry Andric // - SLSR may decrease ILP at the architecture level. Targets that are very
520b57cec5SDimitry Andric //   sensitive to ILP may want to disable it. Having SLSR to consider ILP is
530b57cec5SDimitry Andric //   left as future work.
540b57cec5SDimitry Andric //
550b57cec5SDimitry Andric // - When (i' - i) is constant but i and i' are not, we could still perform
560b57cec5SDimitry Andric //   SLSR.
570b57cec5SDimitry Andric 
58e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
590b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
600b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
610b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
620b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
630b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
640b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
650b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
660b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
670b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
680b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
690b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h"
700b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
710b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
720b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
730b57cec5SDimitry Andric #include "llvm/IR/Module.h"
740b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
750b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
760b57cec5SDimitry Andric #include "llvm/IR/Type.h"
770b57cec5SDimitry Andric #include "llvm/IR/Value.h"
78480093f4SDimitry Andric #include "llvm/InitializePasses.h"
790b57cec5SDimitry Andric #include "llvm/Pass.h"
800b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
810b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
820b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
83480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
840b57cec5SDimitry Andric #include <cassert>
850b57cec5SDimitry Andric #include <cstdint>
860b57cec5SDimitry Andric #include <limits>
870b57cec5SDimitry Andric #include <list>
880b57cec5SDimitry Andric #include <vector>
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric using namespace llvm;
910b57cec5SDimitry Andric using namespace PatternMatch;
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric static const unsigned UnknownAddressSpace =
940b57cec5SDimitry Andric     std::numeric_limits<unsigned>::max();
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric namespace {
970b57cec5SDimitry Andric 
98e8d8bef9SDimitry Andric class StraightLineStrengthReduceLegacyPass : public FunctionPass {
99e8d8bef9SDimitry Andric   const DataLayout *DL = nullptr;
100e8d8bef9SDimitry Andric 
1010b57cec5SDimitry Andric public:
102e8d8bef9SDimitry Andric   static char ID;
103e8d8bef9SDimitry Andric 
StraightLineStrengthReduceLegacyPass()104e8d8bef9SDimitry Andric   StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) {
105e8d8bef9SDimitry Andric     initializeStraightLineStrengthReduceLegacyPassPass(
106e8d8bef9SDimitry Andric         *PassRegistry::getPassRegistry());
107e8d8bef9SDimitry Andric   }
108e8d8bef9SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const109e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
110e8d8bef9SDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
111e8d8bef9SDimitry Andric     AU.addRequired<ScalarEvolutionWrapperPass>();
112e8d8bef9SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
113e8d8bef9SDimitry Andric     // We do not modify the shape of the CFG.
114e8d8bef9SDimitry Andric     AU.setPreservesCFG();
115e8d8bef9SDimitry Andric   }
116e8d8bef9SDimitry Andric 
doInitialization(Module & M)117e8d8bef9SDimitry Andric   bool doInitialization(Module &M) override {
118e8d8bef9SDimitry Andric     DL = &M.getDataLayout();
119e8d8bef9SDimitry Andric     return false;
120e8d8bef9SDimitry Andric   }
121e8d8bef9SDimitry Andric 
122e8d8bef9SDimitry Andric   bool runOnFunction(Function &F) override;
123e8d8bef9SDimitry Andric };
124e8d8bef9SDimitry Andric 
125e8d8bef9SDimitry Andric class StraightLineStrengthReduce {
126e8d8bef9SDimitry Andric public:
StraightLineStrengthReduce(const DataLayout * DL,DominatorTree * DT,ScalarEvolution * SE,TargetTransformInfo * TTI)127e8d8bef9SDimitry Andric   StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT,
128e8d8bef9SDimitry Andric                              ScalarEvolution *SE, TargetTransformInfo *TTI)
129e8d8bef9SDimitry Andric       : DL(DL), DT(DT), SE(SE), TTI(TTI) {}
130e8d8bef9SDimitry Andric 
1310b57cec5SDimitry Andric   // SLSR candidate. Such a candidate must be in one of the forms described in
1320b57cec5SDimitry Andric   // the header comments.
1330b57cec5SDimitry Andric   struct Candidate {
1340b57cec5SDimitry Andric     enum Kind {
1350b57cec5SDimitry Andric       Invalid, // reserved for the default constructor
1360b57cec5SDimitry Andric       Add,     // B + i * S
1370b57cec5SDimitry Andric       Mul,     // (B + i) * S
1380b57cec5SDimitry Andric       GEP,     // &B[..][i * S][..]
1390b57cec5SDimitry Andric     };
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric     Candidate() = default;
Candidate__anon368811100111::StraightLineStrengthReduce::Candidate1420b57cec5SDimitry Andric     Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
1430b57cec5SDimitry Andric               Instruction *I)
1440b57cec5SDimitry Andric         : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I) {}
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric     Kind CandidateKind = Invalid;
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric     const SCEV *Base = nullptr;
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric     // Note that Index and Stride of a GEP candidate do not necessarily have the
1510b57cec5SDimitry Andric     // same integer type. In that case, during rewriting, Stride will be
1520b57cec5SDimitry Andric     // sign-extended or truncated to Index's type.
1530b57cec5SDimitry Andric     ConstantInt *Index = nullptr;
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric     Value *Stride = nullptr;
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric     // The instruction this candidate corresponds to. It helps us to rewrite a
1580b57cec5SDimitry Andric     // candidate with respect to its immediate basis. Note that one instruction
1590b57cec5SDimitry Andric     // can correspond to multiple candidates depending on how you associate the
1600b57cec5SDimitry Andric     // expression. For instance,
1610b57cec5SDimitry Andric     //
1620b57cec5SDimitry Andric     // (a + 1) * (b + 2)
1630b57cec5SDimitry Andric     //
1640b57cec5SDimitry Andric     // can be treated as
1650b57cec5SDimitry Andric     //
1660b57cec5SDimitry Andric     // <Base: a, Index: 1, Stride: b + 2>
1670b57cec5SDimitry Andric     //
1680b57cec5SDimitry Andric     // or
1690b57cec5SDimitry Andric     //
1700b57cec5SDimitry Andric     // <Base: b, Index: 2, Stride: a + 1>
1710b57cec5SDimitry Andric     Instruction *Ins = nullptr;
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric     // Points to the immediate basis of this candidate, or nullptr if we cannot
1740b57cec5SDimitry Andric     // find any basis for this candidate.
1750b57cec5SDimitry Andric     Candidate *Basis = nullptr;
1760b57cec5SDimitry Andric   };
1770b57cec5SDimitry Andric 
178e8d8bef9SDimitry Andric   bool runOnFunction(Function &F);
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric private:
1810b57cec5SDimitry Andric   // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
1820b57cec5SDimitry Andric   // share the same base and stride.
1830b57cec5SDimitry Andric   bool isBasisFor(const Candidate &Basis, const Candidate &C);
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric   // Returns whether the candidate can be folded into an addressing mode.
1860b57cec5SDimitry Andric   bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
1870b57cec5SDimitry Andric                   const DataLayout *DL);
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   // Returns true if C is already in a simplest form and not worth being
1900b57cec5SDimitry Andric   // rewritten.
1910b57cec5SDimitry Andric   bool isSimplestForm(const Candidate &C);
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric   // Checks whether I is in a candidate form. If so, adds all the matching forms
1940b57cec5SDimitry Andric   // to Candidates, and tries to find the immediate basis for each of them.
1950b57cec5SDimitry Andric   void allocateCandidatesAndFindBasis(Instruction *I);
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric   // Allocate candidates and find bases for Add instructions.
1980b57cec5SDimitry Andric   void allocateCandidatesAndFindBasisForAdd(Instruction *I);
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric   // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
2010b57cec5SDimitry Andric   // candidate.
2020b57cec5SDimitry Andric   void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
2030b57cec5SDimitry Andric                                             Instruction *I);
2040b57cec5SDimitry Andric   // Allocate candidates and find bases for Mul instructions.
2050b57cec5SDimitry Andric   void allocateCandidatesAndFindBasisForMul(Instruction *I);
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   // Splits LHS into Base + Index and, if succeeds, calls
2080b57cec5SDimitry Andric   // allocateCandidatesAndFindBasis.
2090b57cec5SDimitry Andric   void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
2100b57cec5SDimitry Andric                                             Instruction *I);
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   // Allocate candidates and find bases for GetElementPtr instructions.
2130b57cec5SDimitry Andric   void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   // A helper function that scales Idx with ElementSize before invoking
2160b57cec5SDimitry Andric   // allocateCandidatesAndFindBasis.
2170b57cec5SDimitry Andric   void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
2180b57cec5SDimitry Andric                                             Value *S, uint64_t ElementSize,
2190b57cec5SDimitry Andric                                             Instruction *I);
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric   // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
2220b57cec5SDimitry Andric   // basis.
2230b57cec5SDimitry Andric   void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
2240b57cec5SDimitry Andric                                       ConstantInt *Idx, Value *S,
2250b57cec5SDimitry Andric                                       Instruction *I);
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   // Rewrites candidate C with respect to Basis.
2280b57cec5SDimitry Andric   void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   // A helper function that factors ArrayIdx to a product of a stride and a
2310b57cec5SDimitry Andric   // constant index, and invokes allocateCandidatesAndFindBasis with the
2320b57cec5SDimitry Andric   // factorings.
2330b57cec5SDimitry Andric   void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
2340b57cec5SDimitry Andric                         GetElementPtrInst *GEP);
2350b57cec5SDimitry Andric 
236297eecfbSDimitry Andric   // Emit code that computes the "bump" from Basis to C.
2370b57cec5SDimitry Andric   static Value *emitBump(const Candidate &Basis, const Candidate &C,
238297eecfbSDimitry Andric                          IRBuilder<> &Builder, const DataLayout *DL);
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   const DataLayout *DL = nullptr;
2410b57cec5SDimitry Andric   DominatorTree *DT = nullptr;
2420b57cec5SDimitry Andric   ScalarEvolution *SE;
2430b57cec5SDimitry Andric   TargetTransformInfo *TTI = nullptr;
2440b57cec5SDimitry Andric   std::list<Candidate> Candidates;
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   // Temporarily holds all instructions that are unlinked (but not deleted) by
2470b57cec5SDimitry Andric   // rewriteCandidateWithBasis. These instructions will be actually removed
2480b57cec5SDimitry Andric   // after all rewriting finishes.
2490b57cec5SDimitry Andric   std::vector<Instruction *> UnlinkedInstructions;
2500b57cec5SDimitry Andric };
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric } // end anonymous namespace
2530b57cec5SDimitry Andric 
254e8d8bef9SDimitry Andric char StraightLineStrengthReduceLegacyPass::ID = 0;
2550b57cec5SDimitry Andric 
256e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr",
2570b57cec5SDimitry Andric                       "Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)2580b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2590b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
2600b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
261e8d8bef9SDimitry Andric INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr",
2620b57cec5SDimitry Andric                     "Straight line strength reduction", false, false)
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric FunctionPass *llvm::createStraightLineStrengthReducePass() {
265e8d8bef9SDimitry Andric   return new StraightLineStrengthReduceLegacyPass();
2660b57cec5SDimitry Andric }
2670b57cec5SDimitry Andric 
isBasisFor(const Candidate & Basis,const Candidate & C)2680b57cec5SDimitry Andric bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
2690b57cec5SDimitry Andric                                             const Candidate &C) {
2700b57cec5SDimitry Andric   return (Basis.Ins != C.Ins && // skip the same instruction
2710b57cec5SDimitry Andric           // They must have the same type too. Basis.Base == C.Base doesn't
2720b57cec5SDimitry Andric           // guarantee their types are the same (PR23975).
2730b57cec5SDimitry Andric           Basis.Ins->getType() == C.Ins->getType() &&
2740b57cec5SDimitry Andric           // Basis must dominate C in order to rewrite C with respect to Basis.
2750b57cec5SDimitry Andric           DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
2760b57cec5SDimitry Andric           // They share the same base, stride, and candidate kind.
2770b57cec5SDimitry Andric           Basis.Base == C.Base && Basis.Stride == C.Stride &&
2780b57cec5SDimitry Andric           Basis.CandidateKind == C.CandidateKind);
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric 
isGEPFoldable(GetElementPtrInst * GEP,const TargetTransformInfo * TTI)2810b57cec5SDimitry Andric static bool isGEPFoldable(GetElementPtrInst *GEP,
2820b57cec5SDimitry Andric                           const TargetTransformInfo *TTI) {
283e8d8bef9SDimitry Andric   SmallVector<const Value *, 4> Indices(GEP->indices());
2840b57cec5SDimitry Andric   return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
2850b57cec5SDimitry Andric                          Indices) == TargetTransformInfo::TCC_Free;
2860b57cec5SDimitry Andric }
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric // Returns whether (Base + Index * Stride) can be folded to an addressing mode.
isAddFoldable(const SCEV * Base,ConstantInt * Index,Value * Stride,TargetTransformInfo * TTI)2890b57cec5SDimitry Andric static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
2900b57cec5SDimitry Andric                           TargetTransformInfo *TTI) {
2910b57cec5SDimitry Andric   // Index->getSExtValue() may crash if Index is wider than 64-bit.
2920b57cec5SDimitry Andric   return Index->getBitWidth() <= 64 &&
2930b57cec5SDimitry Andric          TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
2940b57cec5SDimitry Andric                                     Index->getSExtValue(), UnknownAddressSpace);
2950b57cec5SDimitry Andric }
2960b57cec5SDimitry Andric 
isFoldable(const Candidate & C,TargetTransformInfo * TTI,const DataLayout * DL)2970b57cec5SDimitry Andric bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
2980b57cec5SDimitry Andric                                             TargetTransformInfo *TTI,
2990b57cec5SDimitry Andric                                             const DataLayout *DL) {
3000b57cec5SDimitry Andric   if (C.CandidateKind == Candidate::Add)
3010b57cec5SDimitry Andric     return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
3020b57cec5SDimitry Andric   if (C.CandidateKind == Candidate::GEP)
3030b57cec5SDimitry Andric     return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI);
3040b57cec5SDimitry Andric   return false;
3050b57cec5SDimitry Andric }
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric // Returns true if GEP has zero or one non-zero index.
hasOnlyOneNonZeroIndex(GetElementPtrInst * GEP)3080b57cec5SDimitry Andric static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
3090b57cec5SDimitry Andric   unsigned NumNonZeroIndices = 0;
310fe6060f1SDimitry Andric   for (Use &Idx : GEP->indices()) {
311fe6060f1SDimitry Andric     ConstantInt *ConstIdx = dyn_cast<ConstantInt>(Idx);
3120b57cec5SDimitry Andric     if (ConstIdx == nullptr || !ConstIdx->isZero())
3130b57cec5SDimitry Andric       ++NumNonZeroIndices;
3140b57cec5SDimitry Andric   }
3150b57cec5SDimitry Andric   return NumNonZeroIndices <= 1;
3160b57cec5SDimitry Andric }
3170b57cec5SDimitry Andric 
isSimplestForm(const Candidate & C)3180b57cec5SDimitry Andric bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
3190b57cec5SDimitry Andric   if (C.CandidateKind == Candidate::Add) {
3200b57cec5SDimitry Andric     // B + 1 * S or B + (-1) * S
3210b57cec5SDimitry Andric     return C.Index->isOne() || C.Index->isMinusOne();
3220b57cec5SDimitry Andric   }
3230b57cec5SDimitry Andric   if (C.CandidateKind == Candidate::Mul) {
3240b57cec5SDimitry Andric     // (B + 0) * S
3250b57cec5SDimitry Andric     return C.Index->isZero();
3260b57cec5SDimitry Andric   }
3270b57cec5SDimitry Andric   if (C.CandidateKind == Candidate::GEP) {
3280b57cec5SDimitry Andric     // (char*)B + S or (char*)B - S
3290b57cec5SDimitry Andric     return ((C.Index->isOne() || C.Index->isMinusOne()) &&
3300b57cec5SDimitry Andric             hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
3310b57cec5SDimitry Andric   }
3320b57cec5SDimitry Andric   return false;
3330b57cec5SDimitry Andric }
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric // TODO: We currently implement an algorithm whose time complexity is linear in
3360b57cec5SDimitry Andric // the number of existing candidates. However, we could do better by using
3370b57cec5SDimitry Andric // ScopedHashTable. Specifically, while traversing the dominator tree, we could
3380b57cec5SDimitry Andric // maintain all the candidates that dominate the basic block being traversed in
3390b57cec5SDimitry Andric // a ScopedHashTable. This hash table is indexed by the base and the stride of
3400b57cec5SDimitry Andric // a candidate. Therefore, finding the immediate basis of a candidate boils down
3410b57cec5SDimitry Andric // to one hash-table look up.
allocateCandidatesAndFindBasis(Candidate::Kind CT,const SCEV * B,ConstantInt * Idx,Value * S,Instruction * I)3420b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
3430b57cec5SDimitry Andric     Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
3440b57cec5SDimitry Andric     Instruction *I) {
3450b57cec5SDimitry Andric   Candidate C(CT, B, Idx, S, I);
3460b57cec5SDimitry Andric   // SLSR can complicate an instruction in two cases:
3470b57cec5SDimitry Andric   //
3480b57cec5SDimitry Andric   // 1. If we can fold I into an addressing mode, computing I is likely free or
3490b57cec5SDimitry Andric   // takes only one instruction.
3500b57cec5SDimitry Andric   //
3510b57cec5SDimitry Andric   // 2. I is already in a simplest form. For example, when
3520b57cec5SDimitry Andric   //      X = B + 8 * S
3530b57cec5SDimitry Andric   //      Y = B + S,
3540b57cec5SDimitry Andric   //    rewriting Y to X - 7 * S is probably a bad idea.
3550b57cec5SDimitry Andric   //
3560b57cec5SDimitry Andric   // In the above cases, we still add I to the candidate list so that I can be
3570b57cec5SDimitry Andric   // the basis of other candidates, but we leave I's basis blank so that I
3580b57cec5SDimitry Andric   // won't be rewritten.
3590b57cec5SDimitry Andric   if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
3600b57cec5SDimitry Andric     // Try to compute the immediate basis of C.
3610b57cec5SDimitry Andric     unsigned NumIterations = 0;
3620b57cec5SDimitry Andric     // Limit the scan radius to avoid running in quadratice time.
3630b57cec5SDimitry Andric     static const unsigned MaxNumIterations = 50;
3640b57cec5SDimitry Andric     for (auto Basis = Candidates.rbegin();
3650b57cec5SDimitry Andric          Basis != Candidates.rend() && NumIterations < MaxNumIterations;
3660b57cec5SDimitry Andric          ++Basis, ++NumIterations) {
3670b57cec5SDimitry Andric       if (isBasisFor(*Basis, C)) {
3680b57cec5SDimitry Andric         C.Basis = &(*Basis);
3690b57cec5SDimitry Andric         break;
3700b57cec5SDimitry Andric       }
3710b57cec5SDimitry Andric     }
3720b57cec5SDimitry Andric   }
3730b57cec5SDimitry Andric   // Regardless of whether we find a basis for C, we need to push C to the
3740b57cec5SDimitry Andric   // candidate list so that it can be the basis of other candidates.
3750b57cec5SDimitry Andric   Candidates.push_back(C);
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric 
allocateCandidatesAndFindBasis(Instruction * I)3780b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
3790b57cec5SDimitry Andric     Instruction *I) {
3800b57cec5SDimitry Andric   switch (I->getOpcode()) {
3810b57cec5SDimitry Andric   case Instruction::Add:
3820b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForAdd(I);
3830b57cec5SDimitry Andric     break;
3840b57cec5SDimitry Andric   case Instruction::Mul:
3850b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForMul(I);
3860b57cec5SDimitry Andric     break;
3870b57cec5SDimitry Andric   case Instruction::GetElementPtr:
3880b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
3890b57cec5SDimitry Andric     break;
3900b57cec5SDimitry Andric   }
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric 
allocateCandidatesAndFindBasisForAdd(Instruction * I)3930b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
3940b57cec5SDimitry Andric     Instruction *I) {
3950b57cec5SDimitry Andric   // Try matching B + i * S.
3960b57cec5SDimitry Andric   if (!isa<IntegerType>(I->getType()))
3970b57cec5SDimitry Andric     return;
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   assert(I->getNumOperands() == 2 && "isn't I an add?");
4000b57cec5SDimitry Andric   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
4010b57cec5SDimitry Andric   allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
4020b57cec5SDimitry Andric   if (LHS != RHS)
4030b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
4040b57cec5SDimitry Andric }
4050b57cec5SDimitry Andric 
allocateCandidatesAndFindBasisForAdd(Value * LHS,Value * RHS,Instruction * I)4060b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
4070b57cec5SDimitry Andric     Value *LHS, Value *RHS, Instruction *I) {
4080b57cec5SDimitry Andric   Value *S = nullptr;
4090b57cec5SDimitry Andric   ConstantInt *Idx = nullptr;
4100b57cec5SDimitry Andric   if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
4110b57cec5SDimitry Andric     // I = LHS + RHS = LHS + Idx * S
4120b57cec5SDimitry Andric     allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
4130b57cec5SDimitry Andric   } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
4140b57cec5SDimitry Andric     // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
4150b57cec5SDimitry Andric     APInt One(Idx->getBitWidth(), 1);
4160b57cec5SDimitry Andric     Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
4170b57cec5SDimitry Andric     allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
4180b57cec5SDimitry Andric   } else {
4190b57cec5SDimitry Andric     // At least, I = LHS + 1 * RHS
4200b57cec5SDimitry Andric     ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
4210b57cec5SDimitry Andric     allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
4220b57cec5SDimitry Andric                                    I);
4230b57cec5SDimitry Andric   }
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric 
4260b57cec5SDimitry Andric // Returns true if A matches B + C where C is constant.
matchesAdd(Value * A,Value * & B,ConstantInt * & C)4270b57cec5SDimitry Andric static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
4280b57cec5SDimitry Andric   return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
4290b57cec5SDimitry Andric           match(A, m_Add(m_ConstantInt(C), m_Value(B))));
4300b57cec5SDimitry Andric }
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric // Returns true if A matches B | C where C is constant.
matchesOr(Value * A,Value * & B,ConstantInt * & C)4330b57cec5SDimitry Andric static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
4340b57cec5SDimitry Andric   return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
4350b57cec5SDimitry Andric           match(A, m_Or(m_ConstantInt(C), m_Value(B))));
4360b57cec5SDimitry Andric }
4370b57cec5SDimitry Andric 
allocateCandidatesAndFindBasisForMul(Value * LHS,Value * RHS,Instruction * I)4380b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
4390b57cec5SDimitry Andric     Value *LHS, Value *RHS, Instruction *I) {
4400b57cec5SDimitry Andric   Value *B = nullptr;
4410b57cec5SDimitry Andric   ConstantInt *Idx = nullptr;
4420b57cec5SDimitry Andric   if (matchesAdd(LHS, B, Idx)) {
4430b57cec5SDimitry Andric     // If LHS is in the form of "Base + Index", then I is in the form of
4440b57cec5SDimitry Andric     // "(Base + Index) * RHS".
4450b57cec5SDimitry Andric     allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
4460b57cec5SDimitry Andric   } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
4470b57cec5SDimitry Andric     // If LHS is in the form of "Base | Index" and Base and Index have no common
4480b57cec5SDimitry Andric     // bits set, then
4490b57cec5SDimitry Andric     //   Base | Index = Base + Index
4500b57cec5SDimitry Andric     // and I is thus in the form of "(Base + Index) * RHS".
4510b57cec5SDimitry Andric     allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
4520b57cec5SDimitry Andric   } else {
4530b57cec5SDimitry Andric     // Otherwise, at least try the form (LHS + 0) * RHS.
4540b57cec5SDimitry Andric     ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
4550b57cec5SDimitry Andric     allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
4560b57cec5SDimitry Andric                                    I);
4570b57cec5SDimitry Andric   }
4580b57cec5SDimitry Andric }
4590b57cec5SDimitry Andric 
allocateCandidatesAndFindBasisForMul(Instruction * I)4600b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
4610b57cec5SDimitry Andric     Instruction *I) {
4620b57cec5SDimitry Andric   // Try matching (B + i) * S.
4630b57cec5SDimitry Andric   // TODO: we could extend SLSR to float and vector types.
4640b57cec5SDimitry Andric   if (!isa<IntegerType>(I->getType()))
4650b57cec5SDimitry Andric     return;
4660b57cec5SDimitry Andric 
4670b57cec5SDimitry Andric   assert(I->getNumOperands() == 2 && "isn't I a mul?");
4680b57cec5SDimitry Andric   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
4690b57cec5SDimitry Andric   allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
4700b57cec5SDimitry Andric   if (LHS != RHS) {
4710b57cec5SDimitry Andric     // Symmetrically, try to split RHS to Base + Index.
4720b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
4730b57cec5SDimitry Andric   }
4740b57cec5SDimitry Andric }
4750b57cec5SDimitry Andric 
allocateCandidatesAndFindBasisForGEP(const SCEV * B,ConstantInt * Idx,Value * S,uint64_t ElementSize,Instruction * I)4760b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
4770b57cec5SDimitry Andric     const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
4780b57cec5SDimitry Andric     Instruction *I) {
4790b57cec5SDimitry Andric   // I = B + sext(Idx *nsw S) * ElementSize
4800b57cec5SDimitry Andric   //   = B + (sext(Idx) * sext(S)) * ElementSize
4810b57cec5SDimitry Andric   //   = B + (sext(Idx) * ElementSize) * sext(S)
4820b57cec5SDimitry Andric   // Casting to IntegerType is safe because we skipped vector GEPs.
48306c3fb27SDimitry Andric   IntegerType *PtrIdxTy = cast<IntegerType>(DL->getIndexType(I->getType()));
4840b57cec5SDimitry Andric   ConstantInt *ScaledIdx = ConstantInt::get(
48506c3fb27SDimitry Andric       PtrIdxTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
4860b57cec5SDimitry Andric   allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
4870b57cec5SDimitry Andric }
4880b57cec5SDimitry Andric 
factorArrayIndex(Value * ArrayIdx,const SCEV * Base,uint64_t ElementSize,GetElementPtrInst * GEP)4890b57cec5SDimitry Andric void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
4900b57cec5SDimitry Andric                                                   const SCEV *Base,
4910b57cec5SDimitry Andric                                                   uint64_t ElementSize,
4920b57cec5SDimitry Andric                                                   GetElementPtrInst *GEP) {
4930b57cec5SDimitry Andric   // At least, ArrayIdx = ArrayIdx *nsw 1.
4940b57cec5SDimitry Andric   allocateCandidatesAndFindBasisForGEP(
4950b57cec5SDimitry Andric       Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
4960b57cec5SDimitry Andric       ArrayIdx, ElementSize, GEP);
4970b57cec5SDimitry Andric   Value *LHS = nullptr;
4980b57cec5SDimitry Andric   ConstantInt *RHS = nullptr;
4990b57cec5SDimitry Andric   // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
5000b57cec5SDimitry Andric   // itself. This would allow us to handle the shl case for free. However,
5010b57cec5SDimitry Andric   // matching SCEVs has two issues:
5020b57cec5SDimitry Andric   //
5030b57cec5SDimitry Andric   // 1. this would complicate rewriting because the rewriting procedure
5040b57cec5SDimitry Andric   // would have to translate SCEVs back to IR instructions. This translation
5050b57cec5SDimitry Andric   // is difficult when LHS is further evaluated to a composite SCEV.
5060b57cec5SDimitry Andric   //
5070b57cec5SDimitry Andric   // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
5080b57cec5SDimitry Andric   // to strip nsw/nuw flags which are critical for SLSR to trace into
5090b57cec5SDimitry Andric   // sext'ed multiplication.
5100b57cec5SDimitry Andric   if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
5110b57cec5SDimitry Andric     // SLSR is currently unsafe if i * S may overflow.
5120b57cec5SDimitry Andric     // GEP = Base + sext(LHS *nsw RHS) * ElementSize
5130b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
5140b57cec5SDimitry Andric   } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
5150b57cec5SDimitry Andric     // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
5160b57cec5SDimitry Andric     //     = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
5170b57cec5SDimitry Andric     APInt One(RHS->getBitWidth(), 1);
5180b57cec5SDimitry Andric     ConstantInt *PowerOf2 =
5190b57cec5SDimitry Andric         ConstantInt::get(RHS->getContext(), One << RHS->getValue());
5200b57cec5SDimitry Andric     allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
5210b57cec5SDimitry Andric   }
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric 
allocateCandidatesAndFindBasisForGEP(GetElementPtrInst * GEP)5240b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
5250b57cec5SDimitry Andric     GetElementPtrInst *GEP) {
5260b57cec5SDimitry Andric   // TODO: handle vector GEPs
5270b57cec5SDimitry Andric   if (GEP->getType()->isVectorTy())
5280b57cec5SDimitry Andric     return;
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> IndexExprs;
531fe6060f1SDimitry Andric   for (Use &Idx : GEP->indices())
532fe6060f1SDimitry Andric     IndexExprs.push_back(SE->getSCEV(Idx));
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric   gep_type_iterator GTI = gep_type_begin(GEP);
5350b57cec5SDimitry Andric   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
5360b57cec5SDimitry Andric     if (GTI.isStruct())
5370b57cec5SDimitry Andric       continue;
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric     const SCEV *OrigIndexExpr = IndexExprs[I - 1];
5400b57cec5SDimitry Andric     IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric     // The base of this candidate is GEP's base plus the offsets of all
5430b57cec5SDimitry Andric     // indices except this current one.
5440b57cec5SDimitry Andric     const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
5450b57cec5SDimitry Andric     Value *ArrayIdx = GEP->getOperand(I);
5461db9f3b2SDimitry Andric     uint64_t ElementSize = GTI.getSequentialElementStride(*DL);
5470b57cec5SDimitry Andric     if (ArrayIdx->getType()->getIntegerBitWidth() <=
54806c3fb27SDimitry Andric         DL->getIndexSizeInBits(GEP->getAddressSpace())) {
54906c3fb27SDimitry Andric       // Skip factoring if ArrayIdx is wider than the index size, because
55006c3fb27SDimitry Andric       // ArrayIdx is implicitly truncated to the index size.
5510b57cec5SDimitry Andric       factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
5520b57cec5SDimitry Andric     }
5530b57cec5SDimitry Andric     // When ArrayIdx is the sext of a value, we try to factor that value as
5540b57cec5SDimitry Andric     // well.  Handling this case is important because array indices are
55506c3fb27SDimitry Andric     // typically sign-extended to the pointer index size.
5560b57cec5SDimitry Andric     Value *TruncatedArrayIdx = nullptr;
5570b57cec5SDimitry Andric     if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
5580b57cec5SDimitry Andric         TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
55906c3fb27SDimitry Andric             DL->getIndexSizeInBits(GEP->getAddressSpace())) {
5600b57cec5SDimitry Andric       // Skip factoring if TruncatedArrayIdx is wider than the pointer size,
5610b57cec5SDimitry Andric       // because TruncatedArrayIdx is implicitly truncated to the pointer size.
5620b57cec5SDimitry Andric       factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
5630b57cec5SDimitry Andric     }
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric     IndexExprs[I - 1] = OrigIndexExpr;
5660b57cec5SDimitry Andric   }
5670b57cec5SDimitry Andric }
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric // A helper function that unifies the bitwidth of A and B.
unifyBitWidth(APInt & A,APInt & B)5700b57cec5SDimitry Andric static void unifyBitWidth(APInt &A, APInt &B) {
5710b57cec5SDimitry Andric   if (A.getBitWidth() < B.getBitWidth())
5720b57cec5SDimitry Andric     A = A.sext(B.getBitWidth());
5730b57cec5SDimitry Andric   else if (A.getBitWidth() > B.getBitWidth())
5740b57cec5SDimitry Andric     B = B.sext(A.getBitWidth());
5750b57cec5SDimitry Andric }
5760b57cec5SDimitry Andric 
emitBump(const Candidate & Basis,const Candidate & C,IRBuilder<> & Builder,const DataLayout * DL)5770b57cec5SDimitry Andric Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
5780b57cec5SDimitry Andric                                             const Candidate &C,
5790b57cec5SDimitry Andric                                             IRBuilder<> &Builder,
580297eecfbSDimitry Andric                                             const DataLayout *DL) {
5810b57cec5SDimitry Andric   APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
5820b57cec5SDimitry Andric   unifyBitWidth(Idx, BasisIdx);
5830b57cec5SDimitry Andric   APInt IndexOffset = Idx - BasisIdx;
5840b57cec5SDimitry Andric 
5850b57cec5SDimitry Andric   // Compute Bump = C - Basis = (i' - i) * S.
5860b57cec5SDimitry Andric   // Common case 1: if (i' - i) is 1, Bump = S.
5870b57cec5SDimitry Andric   if (IndexOffset == 1)
5880b57cec5SDimitry Andric     return C.Stride;
5890b57cec5SDimitry Andric   // Common case 2: if (i' - i) is -1, Bump = -S.
590349cc55cSDimitry Andric   if (IndexOffset.isAllOnes())
5910b57cec5SDimitry Andric     return Builder.CreateNeg(C.Stride);
5920b57cec5SDimitry Andric 
5930b57cec5SDimitry Andric   // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
5940b57cec5SDimitry Andric   // have different bit widths.
5950b57cec5SDimitry Andric   IntegerType *DeltaType =
5960b57cec5SDimitry Andric       IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
5970b57cec5SDimitry Andric   Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
5980b57cec5SDimitry Andric   if (IndexOffset.isPowerOf2()) {
5990b57cec5SDimitry Andric     // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
6000b57cec5SDimitry Andric     ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
6010b57cec5SDimitry Andric     return Builder.CreateShl(ExtendedStride, Exponent);
6020b57cec5SDimitry Andric   }
603349cc55cSDimitry Andric   if (IndexOffset.isNegatedPowerOf2()) {
6040b57cec5SDimitry Andric     // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
6050b57cec5SDimitry Andric     ConstantInt *Exponent =
6060b57cec5SDimitry Andric         ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
6070b57cec5SDimitry Andric     return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
6080b57cec5SDimitry Andric   }
6090b57cec5SDimitry Andric   Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
6100b57cec5SDimitry Andric   return Builder.CreateMul(ExtendedStride, Delta);
6110b57cec5SDimitry Andric }
6120b57cec5SDimitry Andric 
rewriteCandidateWithBasis(const Candidate & C,const Candidate & Basis)6130b57cec5SDimitry Andric void StraightLineStrengthReduce::rewriteCandidateWithBasis(
6140b57cec5SDimitry Andric     const Candidate &C, const Candidate &Basis) {
6150b57cec5SDimitry Andric   assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
6160b57cec5SDimitry Andric          C.Stride == Basis.Stride);
6170b57cec5SDimitry Andric   // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
6180b57cec5SDimitry Andric   // basis of a candidate cannot be unlinked before the candidate.
6190b57cec5SDimitry Andric   assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
6200b57cec5SDimitry Andric 
6210b57cec5SDimitry Andric   // An instruction can correspond to multiple candidates. Therefore, instead of
6220b57cec5SDimitry Andric   // simply deleting an instruction when we rewrite it, we mark its parent as
6230b57cec5SDimitry Andric   // nullptr (i.e. unlink it) so that we can skip the candidates whose
6240b57cec5SDimitry Andric   // instruction is already rewritten.
6250b57cec5SDimitry Andric   if (!C.Ins->getParent())
6260b57cec5SDimitry Andric     return;
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric   IRBuilder<> Builder(C.Ins);
629297eecfbSDimitry Andric   Value *Bump = emitBump(Basis, C, Builder, DL);
6300b57cec5SDimitry Andric   Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
6310b57cec5SDimitry Andric   switch (C.CandidateKind) {
6320b57cec5SDimitry Andric   case Candidate::Add:
6330b57cec5SDimitry Andric   case Candidate::Mul: {
6340b57cec5SDimitry Andric     // C = Basis + Bump
6350b57cec5SDimitry Andric     Value *NegBump;
6360b57cec5SDimitry Andric     if (match(Bump, m_Neg(m_Value(NegBump)))) {
6370b57cec5SDimitry Andric       // If Bump is a neg instruction, emit C = Basis - (-Bump).
6380b57cec5SDimitry Andric       Reduced = Builder.CreateSub(Basis.Ins, NegBump);
6390b57cec5SDimitry Andric       // We only use the negative argument of Bump, and Bump itself may be
6400b57cec5SDimitry Andric       // trivially dead.
6410b57cec5SDimitry Andric       RecursivelyDeleteTriviallyDeadInstructions(Bump);
6420b57cec5SDimitry Andric     } else {
6430b57cec5SDimitry Andric       // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
6440b57cec5SDimitry Andric       // usually unsound, e.g.,
6450b57cec5SDimitry Andric       //
6460b57cec5SDimitry Andric       // X = (-2 +nsw 1) *nsw INT_MAX
6470b57cec5SDimitry Andric       // Y = (-2 +nsw 3) *nsw INT_MAX
6480b57cec5SDimitry Andric       //   =>
6490b57cec5SDimitry Andric       // Y = X + 2 * INT_MAX
6500b57cec5SDimitry Andric       //
6510b57cec5SDimitry Andric       // Neither + and * in the resultant expression are nsw.
6520b57cec5SDimitry Andric       Reduced = Builder.CreateAdd(Basis.Ins, Bump);
6530b57cec5SDimitry Andric     }
6540b57cec5SDimitry Andric     break;
6550b57cec5SDimitry Andric   }
656297eecfbSDimitry Andric   case Candidate::GEP: {
6570b57cec5SDimitry Andric     bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
6580b57cec5SDimitry Andric     // C = (char *)Basis + Bump
6597a6dacacSDimitry Andric     Reduced = Builder.CreatePtrAdd(Basis.Ins, Bump, "", InBounds);
6600b57cec5SDimitry Andric     break;
6610b57cec5SDimitry Andric   }
6620b57cec5SDimitry Andric   default:
6630b57cec5SDimitry Andric     llvm_unreachable("C.CandidateKind is invalid");
6640b57cec5SDimitry Andric   };
6650b57cec5SDimitry Andric   Reduced->takeName(C.Ins);
6660b57cec5SDimitry Andric   C.Ins->replaceAllUsesWith(Reduced);
6670b57cec5SDimitry Andric   // Unlink C.Ins so that we can skip other candidates also corresponding to
6680b57cec5SDimitry Andric   // C.Ins. The actual deletion is postponed to the end of runOnFunction.
6690b57cec5SDimitry Andric   C.Ins->removeFromParent();
6700b57cec5SDimitry Andric   UnlinkedInstructions.push_back(C.Ins);
6710b57cec5SDimitry Andric }
6720b57cec5SDimitry Andric 
runOnFunction(Function & F)673e8d8bef9SDimitry Andric bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) {
6740b57cec5SDimitry Andric   if (skipFunction(F))
6750b57cec5SDimitry Andric     return false;
6760b57cec5SDimitry Andric 
677e8d8bef9SDimitry Andric   auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
678e8d8bef9SDimitry Andric   auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
679e8d8bef9SDimitry Andric   auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
680e8d8bef9SDimitry Andric   return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F);
681e8d8bef9SDimitry Andric }
682e8d8bef9SDimitry Andric 
runOnFunction(Function & F)683e8d8bef9SDimitry Andric bool StraightLineStrengthReduce::runOnFunction(Function &F) {
6840b57cec5SDimitry Andric   // Traverse the dominator tree in the depth-first order. This order makes sure
6850b57cec5SDimitry Andric   // all bases of a candidate are in Candidates when we process it.
6860b57cec5SDimitry Andric   for (const auto Node : depth_first(DT))
6870b57cec5SDimitry Andric     for (auto &I : *(Node->getBlock()))
6880b57cec5SDimitry Andric       allocateCandidatesAndFindBasis(&I);
6890b57cec5SDimitry Andric 
6900b57cec5SDimitry Andric   // Rewrite candidates in the reverse depth-first order. This order makes sure
6910b57cec5SDimitry Andric   // a candidate being rewritten is not a basis for any other candidate.
6920b57cec5SDimitry Andric   while (!Candidates.empty()) {
6930b57cec5SDimitry Andric     const Candidate &C = Candidates.back();
6940b57cec5SDimitry Andric     if (C.Basis != nullptr) {
6950b57cec5SDimitry Andric       rewriteCandidateWithBasis(C, *C.Basis);
6960b57cec5SDimitry Andric     }
6970b57cec5SDimitry Andric     Candidates.pop_back();
6980b57cec5SDimitry Andric   }
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric   // Delete all unlink instructions.
7010b57cec5SDimitry Andric   for (auto *UnlinkedInst : UnlinkedInstructions) {
7020b57cec5SDimitry Andric     for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) {
7030b57cec5SDimitry Andric       Value *Op = UnlinkedInst->getOperand(I);
7040b57cec5SDimitry Andric       UnlinkedInst->setOperand(I, nullptr);
7050b57cec5SDimitry Andric       RecursivelyDeleteTriviallyDeadInstructions(Op);
7060b57cec5SDimitry Andric     }
7070b57cec5SDimitry Andric     UnlinkedInst->deleteValue();
7080b57cec5SDimitry Andric   }
7090b57cec5SDimitry Andric   bool Ret = !UnlinkedInstructions.empty();
7100b57cec5SDimitry Andric   UnlinkedInstructions.clear();
7110b57cec5SDimitry Andric   return Ret;
7120b57cec5SDimitry Andric }
713e8d8bef9SDimitry Andric 
714e8d8bef9SDimitry Andric namespace llvm {
715e8d8bef9SDimitry Andric 
716e8d8bef9SDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)717e8d8bef9SDimitry Andric StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
718e8d8bef9SDimitry Andric   const DataLayout *DL = &F.getParent()->getDataLayout();
719e8d8bef9SDimitry Andric   auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
720e8d8bef9SDimitry Andric   auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
721e8d8bef9SDimitry Andric   auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
722e8d8bef9SDimitry Andric 
723e8d8bef9SDimitry Andric   if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F))
724e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
725e8d8bef9SDimitry Andric 
726e8d8bef9SDimitry Andric   PreservedAnalyses PA;
727e8d8bef9SDimitry Andric   PA.preserveSet<CFGAnalyses>();
728e8d8bef9SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
729e8d8bef9SDimitry Andric   PA.preserve<ScalarEvolutionAnalysis>();
730e8d8bef9SDimitry Andric   PA.preserve<TargetIRAnalysis>();
731e8d8bef9SDimitry Andric   return PA;
732e8d8bef9SDimitry Andric }
733e8d8bef9SDimitry Andric 
734e8d8bef9SDimitry Andric } // namespace llvm
735