173471bf0Spatrick //=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
2097a140dSpatrick //
3097a140dSpatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4097a140dSpatrick // See https://llvm.org/LICENSE.txt for license information.
5097a140dSpatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6097a140dSpatrick //
7097a140dSpatrick //===----------------------------------------------------------------------===//
873471bf0Spatrick ///
973471bf0Spatrick /// \file
1073471bf0Spatrick /// Post-legalization combines on generic MachineInstrs.
1173471bf0Spatrick ///
1273471bf0Spatrick /// The combines here must preserve instruction legality.
1373471bf0Spatrick ///
1473471bf0Spatrick /// Lowering combines (e.g. pseudo matching) should be handled by
1573471bf0Spatrick /// AArch64PostLegalizerLowering.
1673471bf0Spatrick ///
1773471bf0Spatrick /// Combines which don't rely on instruction legality should go in the
1873471bf0Spatrick /// AArch64PreLegalizerCombiner.
1973471bf0Spatrick ///
20097a140dSpatrick //===----------------------------------------------------------------------===//
21097a140dSpatrick 
22097a140dSpatrick #include "AArch64TargetMachine.h"
23*d415bd75Srobert #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
24097a140dSpatrick #include "llvm/CodeGen/GlobalISel/Combiner.h"
25097a140dSpatrick #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
26097a140dSpatrick #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
2773471bf0Spatrick #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
28097a140dSpatrick #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
29*d415bd75Srobert #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
30097a140dSpatrick #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
3173471bf0Spatrick #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
3273471bf0Spatrick #include "llvm/CodeGen/GlobalISel/Utils.h"
33097a140dSpatrick #include "llvm/CodeGen/MachineDominators.h"
34097a140dSpatrick #include "llvm/CodeGen/MachineFunctionPass.h"
3573471bf0Spatrick #include "llvm/CodeGen/MachineRegisterInfo.h"
3673471bf0Spatrick #include "llvm/CodeGen/TargetOpcodes.h"
37097a140dSpatrick #include "llvm/CodeGen/TargetPassConfig.h"
38097a140dSpatrick #include "llvm/Support/Debug.h"
39097a140dSpatrick 
40097a140dSpatrick #define DEBUG_TYPE "aarch64-postlegalizer-combiner"
41097a140dSpatrick 
42097a140dSpatrick using namespace llvm;
43097a140dSpatrick using namespace MIPatternMatch;
44097a140dSpatrick 
4573471bf0Spatrick /// This combine tries do what performExtractVectorEltCombine does in SDAG.
4673471bf0Spatrick /// Rewrite for pairwise fadd pattern
4773471bf0Spatrick ///   (s32 (g_extract_vector_elt
4873471bf0Spatrick ///           (g_fadd (vXs32 Other)
4973471bf0Spatrick ///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
5073471bf0Spatrick /// ->
5173471bf0Spatrick ///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
5273471bf0Spatrick ///              (g_extract_vector_elt (vXs32 Other) 1))
matchExtractVecEltPairwiseAdd(MachineInstr & MI,MachineRegisterInfo & MRI,std::tuple<unsigned,LLT,Register> & MatchInfo)5373471bf0Spatrick bool matchExtractVecEltPairwiseAdd(
5473471bf0Spatrick     MachineInstr &MI, MachineRegisterInfo &MRI,
5573471bf0Spatrick     std::tuple<unsigned, LLT, Register> &MatchInfo) {
5673471bf0Spatrick   Register Src1 = MI.getOperand(1).getReg();
5773471bf0Spatrick   Register Src2 = MI.getOperand(2).getReg();
5873471bf0Spatrick   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5973471bf0Spatrick 
60*d415bd75Srobert   auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);
6173471bf0Spatrick   if (!Cst || Cst->Value != 0)
6273471bf0Spatrick     return false;
6373471bf0Spatrick   // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
6473471bf0Spatrick 
6573471bf0Spatrick   // Now check for an fadd operation. TODO: expand this for integer add?
6673471bf0Spatrick   auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
6773471bf0Spatrick   if (!FAddMI)
6873471bf0Spatrick     return false;
6973471bf0Spatrick 
7073471bf0Spatrick   // If we add support for integer add, must restrict these types to just s64.
7173471bf0Spatrick   unsigned DstSize = DstTy.getSizeInBits();
7273471bf0Spatrick   if (DstSize != 16 && DstSize != 32 && DstSize != 64)
7373471bf0Spatrick     return false;
7473471bf0Spatrick 
7573471bf0Spatrick   Register Src1Op1 = FAddMI->getOperand(1).getReg();
7673471bf0Spatrick   Register Src1Op2 = FAddMI->getOperand(2).getReg();
7773471bf0Spatrick   MachineInstr *Shuffle =
7873471bf0Spatrick       getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
7973471bf0Spatrick   MachineInstr *Other = MRI.getVRegDef(Src1Op1);
8073471bf0Spatrick   if (!Shuffle) {
8173471bf0Spatrick     Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
8273471bf0Spatrick     Other = MRI.getVRegDef(Src1Op2);
8373471bf0Spatrick   }
8473471bf0Spatrick 
8573471bf0Spatrick   // We're looking for a shuffle that moves the second element to index 0.
8673471bf0Spatrick   if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
8773471bf0Spatrick       Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
8873471bf0Spatrick     std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
8973471bf0Spatrick     std::get<1>(MatchInfo) = DstTy;
9073471bf0Spatrick     std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
9173471bf0Spatrick     return true;
9273471bf0Spatrick   }
9373471bf0Spatrick   return false;
9473471bf0Spatrick }
9573471bf0Spatrick 
applyExtractVecEltPairwiseAdd(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::tuple<unsigned,LLT,Register> & MatchInfo)9673471bf0Spatrick bool applyExtractVecEltPairwiseAdd(
9773471bf0Spatrick     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
9873471bf0Spatrick     std::tuple<unsigned, LLT, Register> &MatchInfo) {
9973471bf0Spatrick   unsigned Opc = std::get<0>(MatchInfo);
10073471bf0Spatrick   assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
10173471bf0Spatrick   // We want to generate two extracts of elements 0 and 1, and add them.
10273471bf0Spatrick   LLT Ty = std::get<1>(MatchInfo);
10373471bf0Spatrick   Register Src = std::get<2>(MatchInfo);
10473471bf0Spatrick   LLT s64 = LLT::scalar(64);
10573471bf0Spatrick   B.setInstrAndDebugLoc(MI);
10673471bf0Spatrick   auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
10773471bf0Spatrick   auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
10873471bf0Spatrick   B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
10973471bf0Spatrick   MI.eraseFromParent();
11073471bf0Spatrick   return true;
11173471bf0Spatrick }
11273471bf0Spatrick 
isSignExtended(Register R,MachineRegisterInfo & MRI)11373471bf0Spatrick static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
11473471bf0Spatrick   // TODO: check if extended build vector as well.
11573471bf0Spatrick   unsigned Opc = MRI.getVRegDef(R)->getOpcode();
11673471bf0Spatrick   return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
11773471bf0Spatrick }
11873471bf0Spatrick 
isZeroExtended(Register R,MachineRegisterInfo & MRI)11973471bf0Spatrick static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
12073471bf0Spatrick   // TODO: check if extended build vector as well.
12173471bf0Spatrick   return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
12273471bf0Spatrick }
12373471bf0Spatrick 
matchAArch64MulConstCombine(MachineInstr & MI,MachineRegisterInfo & MRI,std::function<void (MachineIRBuilder & B,Register DstReg)> & ApplyFn)12473471bf0Spatrick bool matchAArch64MulConstCombine(
12573471bf0Spatrick     MachineInstr &MI, MachineRegisterInfo &MRI,
12673471bf0Spatrick     std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
12773471bf0Spatrick   assert(MI.getOpcode() == TargetOpcode::G_MUL);
12873471bf0Spatrick   Register LHS = MI.getOperand(1).getReg();
12973471bf0Spatrick   Register RHS = MI.getOperand(2).getReg();
13073471bf0Spatrick   Register Dst = MI.getOperand(0).getReg();
13173471bf0Spatrick   const LLT Ty = MRI.getType(LHS);
13273471bf0Spatrick 
13373471bf0Spatrick   // The below optimizations require a constant RHS.
134*d415bd75Srobert   auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);
13573471bf0Spatrick   if (!Const)
13673471bf0Spatrick     return false;
13773471bf0Spatrick 
138*d415bd75Srobert   APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
13973471bf0Spatrick   // The following code is ported from AArch64ISelLowering.
14073471bf0Spatrick   // Multiplication of a power of two plus/minus one can be done more
14173471bf0Spatrick   // cheaply as as shift+add/sub. For now, this is true unilaterally. If
14273471bf0Spatrick   // future CPUs have a cheaper MADD instruction, this may need to be
14373471bf0Spatrick   // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
14473471bf0Spatrick   // 64-bit is 5 cycles, so this is always a win.
14573471bf0Spatrick   // More aggressively, some multiplications N0 * C can be lowered to
14673471bf0Spatrick   // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
14773471bf0Spatrick   // e.g. 6=3*2=(2+1)*2.
14873471bf0Spatrick   // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
14973471bf0Spatrick   // which equals to (1+2)*16-(1+2).
15073471bf0Spatrick   // TrailingZeroes is used to test if the mul can be lowered to
15173471bf0Spatrick   // shift+add+shift.
15273471bf0Spatrick   unsigned TrailingZeroes = ConstValue.countTrailingZeros();
15373471bf0Spatrick   if (TrailingZeroes) {
15473471bf0Spatrick     // Conservatively do not lower to shift+add+shift if the mul might be
15573471bf0Spatrick     // folded into smul or umul.
15673471bf0Spatrick     if (MRI.hasOneNonDBGUse(LHS) &&
15773471bf0Spatrick         (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
15873471bf0Spatrick       return false;
15973471bf0Spatrick     // Conservatively do not lower to shift+add+shift if the mul might be
16073471bf0Spatrick     // folded into madd or msub.
16173471bf0Spatrick     if (MRI.hasOneNonDBGUse(Dst)) {
16273471bf0Spatrick       MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
16373471bf0Spatrick       unsigned UseOpc = UseMI.getOpcode();
16473471bf0Spatrick       if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||
16573471bf0Spatrick           UseOpc == TargetOpcode::G_SUB)
16673471bf0Spatrick         return false;
16773471bf0Spatrick     }
16873471bf0Spatrick   }
16973471bf0Spatrick   // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
17073471bf0Spatrick   // and shift+add+shift.
17173471bf0Spatrick   APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
17273471bf0Spatrick 
17373471bf0Spatrick   unsigned ShiftAmt, AddSubOpc;
17473471bf0Spatrick   // Is the shifted value the LHS operand of the add/sub?
17573471bf0Spatrick   bool ShiftValUseIsLHS = true;
17673471bf0Spatrick   // Do we need to negate the result?
17773471bf0Spatrick   bool NegateResult = false;
17873471bf0Spatrick 
17973471bf0Spatrick   if (ConstValue.isNonNegative()) {
18073471bf0Spatrick     // (mul x, 2^N + 1) => (add (shl x, N), x)
18173471bf0Spatrick     // (mul x, 2^N - 1) => (sub (shl x, N), x)
18273471bf0Spatrick     // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
18373471bf0Spatrick     APInt SCVMinus1 = ShiftedConstValue - 1;
18473471bf0Spatrick     APInt CVPlus1 = ConstValue + 1;
18573471bf0Spatrick     if (SCVMinus1.isPowerOf2()) {
18673471bf0Spatrick       ShiftAmt = SCVMinus1.logBase2();
18773471bf0Spatrick       AddSubOpc = TargetOpcode::G_ADD;
18873471bf0Spatrick     } else if (CVPlus1.isPowerOf2()) {
18973471bf0Spatrick       ShiftAmt = CVPlus1.logBase2();
19073471bf0Spatrick       AddSubOpc = TargetOpcode::G_SUB;
19173471bf0Spatrick     } else
19273471bf0Spatrick       return false;
19373471bf0Spatrick   } else {
19473471bf0Spatrick     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19573471bf0Spatrick     // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
19673471bf0Spatrick     APInt CVNegPlus1 = -ConstValue + 1;
19773471bf0Spatrick     APInt CVNegMinus1 = -ConstValue - 1;
19873471bf0Spatrick     if (CVNegPlus1.isPowerOf2()) {
19973471bf0Spatrick       ShiftAmt = CVNegPlus1.logBase2();
20073471bf0Spatrick       AddSubOpc = TargetOpcode::G_SUB;
20173471bf0Spatrick       ShiftValUseIsLHS = false;
20273471bf0Spatrick     } else if (CVNegMinus1.isPowerOf2()) {
20373471bf0Spatrick       ShiftAmt = CVNegMinus1.logBase2();
20473471bf0Spatrick       AddSubOpc = TargetOpcode::G_ADD;
20573471bf0Spatrick       NegateResult = true;
20673471bf0Spatrick     } else
20773471bf0Spatrick       return false;
20873471bf0Spatrick   }
20973471bf0Spatrick 
21073471bf0Spatrick   if (NegateResult && TrailingZeroes)
21173471bf0Spatrick     return false;
21273471bf0Spatrick 
21373471bf0Spatrick   ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
21473471bf0Spatrick     auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
21573471bf0Spatrick     auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
21673471bf0Spatrick 
21773471bf0Spatrick     Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
21873471bf0Spatrick     Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
21973471bf0Spatrick     auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
22073471bf0Spatrick     assert(!(NegateResult && TrailingZeroes) &&
22173471bf0Spatrick            "NegateResult and TrailingZeroes cannot both be true for now.");
22273471bf0Spatrick     // Negate the result.
22373471bf0Spatrick     if (NegateResult) {
22473471bf0Spatrick       B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
22573471bf0Spatrick       return;
22673471bf0Spatrick     }
22773471bf0Spatrick     // Shift the result.
22873471bf0Spatrick     if (TrailingZeroes) {
22973471bf0Spatrick       B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
23073471bf0Spatrick       return;
23173471bf0Spatrick     }
23273471bf0Spatrick     B.buildCopy(DstReg, Res.getReg(0));
233097a140dSpatrick   };
234097a140dSpatrick   return true;
235097a140dSpatrick }
236097a140dSpatrick 
applyAArch64MulConstCombine(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::function<void (MachineIRBuilder & B,Register DstReg)> & ApplyFn)23773471bf0Spatrick bool applyAArch64MulConstCombine(
23873471bf0Spatrick     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
23973471bf0Spatrick     std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
24073471bf0Spatrick   B.setInstrAndDebugLoc(MI);
24173471bf0Spatrick   ApplyFn(B, MI.getOperand(0).getReg());
242097a140dSpatrick   MI.eraseFromParent();
243097a140dSpatrick   return true;
244097a140dSpatrick }
245097a140dSpatrick 
24673471bf0Spatrick /// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
24773471bf0Spatrick /// is a zero, into a G_ZEXT of the first.
matchFoldMergeToZext(MachineInstr & MI,MachineRegisterInfo & MRI)24873471bf0Spatrick bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {
24973471bf0Spatrick   auto &Merge = cast<GMerge>(MI);
25073471bf0Spatrick   LLT SrcTy = MRI.getType(Merge.getSourceReg(0));
25173471bf0Spatrick   if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)
25273471bf0Spatrick     return false;
25373471bf0Spatrick   return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));
25473471bf0Spatrick }
25573471bf0Spatrick 
applyFoldMergeToZext(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)25673471bf0Spatrick void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
25773471bf0Spatrick                           MachineIRBuilder &B, GISelChangeObserver &Observer) {
25873471bf0Spatrick   // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)
25973471bf0Spatrick   //  ->
26073471bf0Spatrick   // %d(s64) = G_ZEXT %a(s32)
26173471bf0Spatrick   Observer.changingInstr(MI);
26273471bf0Spatrick   MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
263*d415bd75Srobert   MI.removeOperand(2);
26473471bf0Spatrick   Observer.changedInstr(MI);
265097a140dSpatrick }
266097a140dSpatrick 
267*d415bd75Srobert /// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
268*d415bd75Srobert /// instruction.
matchMutateAnyExtToZExt(MachineInstr & MI,MachineRegisterInfo & MRI)269*d415bd75Srobert static bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
270*d415bd75Srobert   // If this is coming from a scalar compare then we can use a G_ZEXT instead of
271*d415bd75Srobert   // a G_ANYEXT:
272*d415bd75Srobert   //
273*d415bd75Srobert   // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
274*d415bd75Srobert   // %ext:_(s64) = G_ANYEXT %cmp(s32)
275*d415bd75Srobert   //
276*d415bd75Srobert   // By doing this, we can leverage more KnownBits combines.
277*d415bd75Srobert   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
278*d415bd75Srobert   Register Dst = MI.getOperand(0).getReg();
279*d415bd75Srobert   Register Src = MI.getOperand(1).getReg();
280*d415bd75Srobert   return MRI.getType(Dst).isScalar() &&
281*d415bd75Srobert          mi_match(Src, MRI,
282*d415bd75Srobert                   m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()),
283*d415bd75Srobert                            m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
284*d415bd75Srobert }
285*d415bd75Srobert 
applyMutateAnyExtToZExt(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)286*d415bd75Srobert static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
287*d415bd75Srobert                               MachineIRBuilder &B,
288*d415bd75Srobert                               GISelChangeObserver &Observer) {
289*d415bd75Srobert   Observer.changingInstr(MI);
290*d415bd75Srobert   MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
291*d415bd75Srobert   Observer.changedInstr(MI);
292*d415bd75Srobert }
293*d415bd75Srobert 
294*d415bd75Srobert /// Match a 128b store of zero and split it into two 64 bit stores, for
295*d415bd75Srobert /// size/performance reasons.
matchSplitStoreZero128(MachineInstr & MI,MachineRegisterInfo & MRI)296*d415bd75Srobert static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
297*d415bd75Srobert   GStore &Store = cast<GStore>(MI);
298*d415bd75Srobert   if (!Store.isSimple())
299*d415bd75Srobert     return false;
300*d415bd75Srobert   LLT ValTy = MRI.getType(Store.getValueReg());
301*d415bd75Srobert   if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
302*d415bd75Srobert     return false;
303*d415bd75Srobert   if (ValTy.getSizeInBits() != Store.getMemSizeInBits())
304*d415bd75Srobert     return false; // Don't split truncating stores.
305*d415bd75Srobert   if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
306*d415bd75Srobert     return false;
307*d415bd75Srobert   auto MaybeCst = isConstantOrConstantSplatVector(
308*d415bd75Srobert       *MRI.getVRegDef(Store.getValueReg()), MRI);
309*d415bd75Srobert   return MaybeCst && MaybeCst->isZero();
310*d415bd75Srobert }
311*d415bd75Srobert 
applySplitStoreZero128(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)312*d415bd75Srobert static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
313*d415bd75Srobert                                    MachineIRBuilder &B,
314*d415bd75Srobert                                    GISelChangeObserver &Observer) {
315*d415bd75Srobert   B.setInstrAndDebugLoc(MI);
316*d415bd75Srobert   GStore &Store = cast<GStore>(MI);
317*d415bd75Srobert   assert(MRI.getType(Store.getValueReg()).isVector() &&
318*d415bd75Srobert          "Expected a vector store value");
319*d415bd75Srobert   LLT NewTy = LLT::scalar(64);
320*d415bd75Srobert   Register PtrReg = Store.getPointerReg();
321*d415bd75Srobert   auto Zero = B.buildConstant(NewTy, 0);
322*d415bd75Srobert   auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
323*d415bd75Srobert                                B.buildConstant(LLT::scalar(64), 8));
324*d415bd75Srobert   auto &MF = *MI.getMF();
325*d415bd75Srobert   auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
326*d415bd75Srobert   auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
327*d415bd75Srobert   B.buildStore(Zero, PtrReg, *LowMMO);
328*d415bd75Srobert   B.buildStore(Zero, HighPtr, *HighMMO);
329*d415bd75Srobert   Store.eraseFromParent();
330*d415bd75Srobert }
331*d415bd75Srobert 
332097a140dSpatrick #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
333097a140dSpatrick #include "AArch64GenPostLegalizeGICombiner.inc"
334097a140dSpatrick #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
335097a140dSpatrick 
336097a140dSpatrick namespace {
337097a140dSpatrick #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
338097a140dSpatrick #include "AArch64GenPostLegalizeGICombiner.inc"
339097a140dSpatrick #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
340097a140dSpatrick 
341097a140dSpatrick class AArch64PostLegalizerCombinerInfo : public CombinerInfo {
342097a140dSpatrick   GISelKnownBits *KB;
343097a140dSpatrick   MachineDominatorTree *MDT;
344097a140dSpatrick 
345097a140dSpatrick public:
346097a140dSpatrick   AArch64GenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
347097a140dSpatrick 
AArch64PostLegalizerCombinerInfo(bool EnableOpt,bool OptSize,bool MinSize,GISelKnownBits * KB,MachineDominatorTree * MDT)348097a140dSpatrick   AArch64PostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
349097a140dSpatrick                                    GISelKnownBits *KB,
350097a140dSpatrick                                    MachineDominatorTree *MDT)
351097a140dSpatrick       : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
352097a140dSpatrick                      /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
353097a140dSpatrick         KB(KB), MDT(MDT) {
354097a140dSpatrick     if (!GeneratedRuleCfg.parseCommandLineOption())
355097a140dSpatrick       report_fatal_error("Invalid rule identifier");
356097a140dSpatrick   }
357097a140dSpatrick 
358*d415bd75Srobert   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
359097a140dSpatrick                MachineIRBuilder &B) const override;
360097a140dSpatrick };
361097a140dSpatrick 
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const362097a140dSpatrick bool AArch64PostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
363097a140dSpatrick                                                MachineInstr &MI,
364097a140dSpatrick                                                MachineIRBuilder &B) const {
365097a140dSpatrick   const auto *LI =
366097a140dSpatrick       MI.getParent()->getParent()->getSubtarget().getLegalizerInfo();
367*d415bd75Srobert   CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, LI);
368097a140dSpatrick   AArch64GenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
369097a140dSpatrick   return Generated.tryCombineAll(Observer, MI, B, Helper);
370097a140dSpatrick }
371097a140dSpatrick 
372097a140dSpatrick #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
373097a140dSpatrick #include "AArch64GenPostLegalizeGICombiner.inc"
374097a140dSpatrick #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
375097a140dSpatrick 
376097a140dSpatrick class AArch64PostLegalizerCombiner : public MachineFunctionPass {
377097a140dSpatrick public:
378097a140dSpatrick   static char ID;
379097a140dSpatrick 
380097a140dSpatrick   AArch64PostLegalizerCombiner(bool IsOptNone = false);
381097a140dSpatrick 
getPassName() const382097a140dSpatrick   StringRef getPassName() const override {
383097a140dSpatrick     return "AArch64PostLegalizerCombiner";
384097a140dSpatrick   }
385097a140dSpatrick 
386097a140dSpatrick   bool runOnMachineFunction(MachineFunction &MF) override;
387097a140dSpatrick   void getAnalysisUsage(AnalysisUsage &AU) const override;
388097a140dSpatrick 
389097a140dSpatrick private:
390097a140dSpatrick   bool IsOptNone;
391097a140dSpatrick };
392097a140dSpatrick } // end anonymous namespace
393097a140dSpatrick 
getAnalysisUsage(AnalysisUsage & AU) const394097a140dSpatrick void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
395097a140dSpatrick   AU.addRequired<TargetPassConfig>();
396097a140dSpatrick   AU.setPreservesCFG();
397097a140dSpatrick   getSelectionDAGFallbackAnalysisUsage(AU);
398097a140dSpatrick   AU.addRequired<GISelKnownBitsAnalysis>();
399097a140dSpatrick   AU.addPreserved<GISelKnownBitsAnalysis>();
400097a140dSpatrick   if (!IsOptNone) {
401097a140dSpatrick     AU.addRequired<MachineDominatorTree>();
402097a140dSpatrick     AU.addPreserved<MachineDominatorTree>();
40373471bf0Spatrick     AU.addRequired<GISelCSEAnalysisWrapperPass>();
40473471bf0Spatrick     AU.addPreserved<GISelCSEAnalysisWrapperPass>();
405097a140dSpatrick   }
406097a140dSpatrick   MachineFunctionPass::getAnalysisUsage(AU);
407097a140dSpatrick }
408097a140dSpatrick 
AArch64PostLegalizerCombiner(bool IsOptNone)409097a140dSpatrick AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)
410097a140dSpatrick     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
411097a140dSpatrick   initializeAArch64PostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
412097a140dSpatrick }
413097a140dSpatrick 
runOnMachineFunction(MachineFunction & MF)414097a140dSpatrick bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
415097a140dSpatrick   if (MF.getProperties().hasProperty(
416097a140dSpatrick           MachineFunctionProperties::Property::FailedISel))
417097a140dSpatrick     return false;
418097a140dSpatrick   assert(MF.getProperties().hasProperty(
419097a140dSpatrick              MachineFunctionProperties::Property::Legalized) &&
420097a140dSpatrick          "Expected a legalized function?");
421097a140dSpatrick   auto *TPC = &getAnalysis<TargetPassConfig>();
422097a140dSpatrick   const Function &F = MF.getFunction();
423097a140dSpatrick   bool EnableOpt =
424097a140dSpatrick       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
425097a140dSpatrick   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
426097a140dSpatrick   MachineDominatorTree *MDT =
427097a140dSpatrick       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
428097a140dSpatrick   AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
429097a140dSpatrick                                           F.hasMinSize(), KB, MDT);
43073471bf0Spatrick   GISelCSEAnalysisWrapper &Wrapper =
43173471bf0Spatrick       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
43273471bf0Spatrick   auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
433097a140dSpatrick   Combiner C(PCInfo, TPC);
43473471bf0Spatrick   return C.combineMachineInstrs(MF, CSEInfo);
435097a140dSpatrick }
436097a140dSpatrick 
437097a140dSpatrick char AArch64PostLegalizerCombiner::ID = 0;
438097a140dSpatrick INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,
439097a140dSpatrick                       "Combine AArch64 MachineInstrs after legalization", false,
440097a140dSpatrick                       false)
441097a140dSpatrick INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
442097a140dSpatrick INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
443097a140dSpatrick INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
444097a140dSpatrick                     "Combine AArch64 MachineInstrs after legalization", false,
445097a140dSpatrick                     false)
446097a140dSpatrick 
447097a140dSpatrick namespace llvm {
createAArch64PostLegalizerCombiner(bool IsOptNone)44873471bf0Spatrick FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
449097a140dSpatrick   return new AArch64PostLegalizerCombiner(IsOptNone);
450097a140dSpatrick }
451097a140dSpatrick } // end namespace llvm
452