10b57cec5SDimitry Andric //===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass reassociates n-ary add expressions and eliminates the redundancy 100b57cec5SDimitry Andric // exposed by the reassociation. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // A motivating example: 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric // void foo(int a, int b) { 150b57cec5SDimitry Andric // bar(a + b); 160b57cec5SDimitry Andric // bar((a + 2) + b); 170b57cec5SDimitry Andric // } 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify 200b57cec5SDimitry Andric // the above code to 210b57cec5SDimitry Andric // 220b57cec5SDimitry Andric // int t = a + b; 230b57cec5SDimitry Andric // bar(t); 240b57cec5SDimitry Andric // bar(t + 2); 250b57cec5SDimitry Andric // 260b57cec5SDimitry Andric // However, the Reassociate pass is unable to do that because it processes each 270b57cec5SDimitry Andric // instruction individually and believes (a + 2) + b is the best form according 280b57cec5SDimitry Andric // to its rank system. 290b57cec5SDimitry Andric // 300b57cec5SDimitry Andric // To address this limitation, NaryReassociate reassociates an expression in a 310b57cec5SDimitry Andric // form that reuses existing instructions. As a result, NaryReassociate can 320b57cec5SDimitry Andric // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that 330b57cec5SDimitry Andric // (a + b) is computed before. 340b57cec5SDimitry Andric // 350b57cec5SDimitry Andric // NaryReassociate works as follows. For every instruction in the form of (a + 360b57cec5SDimitry Andric // b) + c, it checks whether a + c or b + c is already computed by a dominating 370b57cec5SDimitry Andric // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + 380b57cec5SDimitry Andric // c) + a and removes the redundancy accordingly. To efficiently look up whether 390b57cec5SDimitry Andric // an expression is computed before, we store each instruction seen and its SCEV 400b57cec5SDimitry Andric // into an SCEV-to-instruction map. 410b57cec5SDimitry Andric // 420b57cec5SDimitry Andric // Although the algorithm pattern-matches only ternary additions, it 430b57cec5SDimitry Andric // automatically handles many >3-ary expressions by walking through the function 440b57cec5SDimitry Andric // in the depth-first order. For example, given 450b57cec5SDimitry Andric // 460b57cec5SDimitry Andric // (a + c) + d 470b57cec5SDimitry Andric // ((a + b) + c) + d 480b57cec5SDimitry Andric // 490b57cec5SDimitry Andric // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites 500b57cec5SDimitry Andric // ((a + c) + b) + d into ((a + c) + d) + b. 510b57cec5SDimitry Andric // 520b57cec5SDimitry Andric // Finally, the above dominator-based algorithm may need to be run multiple 530b57cec5SDimitry Andric // iterations before emitting optimal code. One source of this need is that we 540b57cec5SDimitry Andric // only split an operand when it is used only once. The above algorithm can 550b57cec5SDimitry Andric // eliminate an instruction and decrease the usage count of its operands. As a 560b57cec5SDimitry Andric // result, an instruction that previously had multiple uses may become a 570b57cec5SDimitry Andric // single-use instruction and thus eligible for split consideration. For 580b57cec5SDimitry Andric // example, 590b57cec5SDimitry Andric // 600b57cec5SDimitry Andric // ac = a + c 610b57cec5SDimitry Andric // ab = a + b 620b57cec5SDimitry Andric // abc = ab + c 630b57cec5SDimitry Andric // ab2 = ab + b 640b57cec5SDimitry Andric // ab2c = ab2 + c 650b57cec5SDimitry Andric // 660b57cec5SDimitry Andric // In the first iteration, we cannot reassociate abc to ac+b because ab is used 670b57cec5SDimitry Andric // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a 680b57cec5SDimitry Andric // result, ab2 becomes dead and ab will be used only once in the second 690b57cec5SDimitry Andric // iteration. 700b57cec5SDimitry Andric // 710b57cec5SDimitry Andric // Limitations and TODO items: 720b57cec5SDimitry Andric // 730b57cec5SDimitry Andric // 1) We only considers n-ary adds and muls for now. This should be extended 740b57cec5SDimitry Andric // and generalized. 750b57cec5SDimitry Andric // 760b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric #ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 790b57cec5SDimitry Andric #define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 820b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 830b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 840b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h" 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric namespace llvm { 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric class AssumptionCache; 890b57cec5SDimitry Andric class BinaryOperator; 900b57cec5SDimitry Andric class DataLayout; 910b57cec5SDimitry Andric class DominatorTree; 920b57cec5SDimitry Andric class Function; 930b57cec5SDimitry Andric class GetElementPtrInst; 940b57cec5SDimitry Andric class Instruction; 950b57cec5SDimitry Andric class ScalarEvolution; 960b57cec5SDimitry Andric class SCEV; 970b57cec5SDimitry Andric class TargetLibraryInfo; 980b57cec5SDimitry Andric class TargetTransformInfo; 990b57cec5SDimitry Andric class Type; 1000b57cec5SDimitry Andric class Value; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> { 1030b57cec5SDimitry Andric public: 1040b57cec5SDimitry Andric PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric // Glue for old PM. 1070b57cec5SDimitry Andric bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, 1080b57cec5SDimitry Andric ScalarEvolution *SE_, TargetLibraryInfo *TLI_, 1090b57cec5SDimitry Andric TargetTransformInfo *TTI_); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric private: 1120b57cec5SDimitry Andric // Runs only one iteration of the dominator-based algorithm. See the header 1130b57cec5SDimitry Andric // comments for why we need multiple iterations. 1140b57cec5SDimitry Andric bool doOneIteration(Function &F); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric // Reassociates I for better CSE. 1170b57cec5SDimitry Andric Instruction *tryReassociate(Instruction *I, const SCEV *&OrigSCEV); 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric // Reassociate GEP for better CSE. 1200b57cec5SDimitry Andric Instruction *tryReassociateGEP(GetElementPtrInst *GEP); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric // Try splitting GEP at the I-th index and see whether either part can be 1230b57cec5SDimitry Andric // CSE'ed. This is a helper function for tryReassociateGEP. 1240b57cec5SDimitry Andric // 1250b57cec5SDimitry Andric // \p IndexedType The element type indexed by GEP's I-th index. This is 1260b57cec5SDimitry Andric // equivalent to 1270b57cec5SDimitry Andric // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, 1280b57cec5SDimitry Andric // ..., i-th index). 1290b57cec5SDimitry Andric GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 1300b57cec5SDimitry Andric unsigned I, Type *IndexedType); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or 1330b57cec5SDimitry Andric // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. 1340b57cec5SDimitry Andric GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 1350b57cec5SDimitry Andric unsigned I, Value *LHS, 1360b57cec5SDimitry Andric Value *RHS, Type *IndexedType); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric // Reassociate binary operators for better CSE. 1390b57cec5SDimitry Andric Instruction *tryReassociateBinaryOp(BinaryOperator *I); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly 1420b57cec5SDimitry Andric // passed. 1430b57cec5SDimitry Andric Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, 1440b57cec5SDimitry Andric BinaryOperator *I); 1450b57cec5SDimitry Andric // Rewrites I to (LHS op RHS) if LHS is computed already. 1460b57cec5SDimitry Andric Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, 1470b57cec5SDimitry Andric BinaryOperator *I); 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // Tries to match Op1 and Op2 by using V. 1500b57cec5SDimitry Andric bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric // Gets SCEV for (LHS op RHS). 1530b57cec5SDimitry Andric const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, 1540b57cec5SDimitry Andric const SCEV *RHS); 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric // Returns the closest dominator of \c Dominatee that computes 1570b57cec5SDimitry Andric // \c CandidateExpr. Returns null if not found. 1580b57cec5SDimitry Andric Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, 1590b57cec5SDimitry Andric Instruction *Dominatee); 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric // Try to match \p I as signed/unsigned Min/Max and reassociate it. \p 1620b57cec5SDimitry Andric // OrigSCEV is set if \I matches Min/Max regardless whether resassociation is 1630b57cec5SDimitry Andric // done or not. If reassociation was successful newly generated instruction is 1640b57cec5SDimitry Andric // returned, otherwise nullptr. 1650b57cec5SDimitry Andric template <typename PredT> 1660b57cec5SDimitry Andric Instruction *matchAndReassociateMinOrMax(Instruction *I, 1670b57cec5SDimitry Andric const SCEV *&OrigSCEV); 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric // Reassociate Min/Max. 1700b57cec5SDimitry Andric template <typename MaxMinT> 1710b57cec5SDimitry Andric Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS, 1720b57cec5SDimitry Andric Value *RHS); 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric // GetElementPtrInst implicitly sign-extends an index if the index is shorter 1750b57cec5SDimitry Andric // than the pointer size. This function returns whether Index is shorter than 1760b57cec5SDimitry Andric // GEP's pointer size, i.e., whether Index needs to be sign-extended in order 1770b57cec5SDimitry Andric // to be an index of GEP. 1780b57cec5SDimitry Andric bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric AssumptionCache *AC; 1810b57cec5SDimitry Andric const DataLayout *DL; 1820b57cec5SDimitry Andric DominatorTree *DT; 1830b57cec5SDimitry Andric ScalarEvolution *SE; 1840b57cec5SDimitry Andric TargetLibraryInfo *TLI; 1850b57cec5SDimitry Andric TargetTransformInfo *TTI; 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric // A lookup table quickly telling which instructions compute the given SCEV. 1880b57cec5SDimitry Andric // Note that there can be multiple instructions at different locations 189 // computing to the same SCEV, so we map a SCEV to an instruction list. For 190 // example, 191 // 192 // if (p1) 193 // foo(a + b); 194 // if (p2) 195 // bar(a + b); 196 DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs; 197 }; 198 199 } // end namespace llvm 200 201 #endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 202