1e8d8bef9SDimitry Andric //=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric // This pass does post-instruction-selection optimizations in the GlobalISel 10e8d8bef9SDimitry Andric // pipeline, before the rest of codegen runs. 11e8d8bef9SDimitry Andric // 12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 13e8d8bef9SDimitry Andric 14e8d8bef9SDimitry Andric #include "AArch64.h" 15e8d8bef9SDimitry Andric #include "AArch64TargetMachine.h" 16e8d8bef9SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h" 17fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h" 18e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 19e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 20e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 21e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 22e8d8bef9SDimitry Andric #include "llvm/Support/Debug.h" 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-post-select-optimize" 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric using namespace llvm; 27e8d8bef9SDimitry Andric 28e8d8bef9SDimitry Andric namespace { 29e8d8bef9SDimitry Andric class AArch64PostSelectOptimize : public MachineFunctionPass { 30e8d8bef9SDimitry Andric public: 31e8d8bef9SDimitry Andric static char ID; 32e8d8bef9SDimitry Andric 33e8d8bef9SDimitry Andric AArch64PostSelectOptimize(); 34e8d8bef9SDimitry Andric 35e8d8bef9SDimitry Andric StringRef getPassName() const override { 36e8d8bef9SDimitry Andric return "AArch64 Post Select Optimizer"; 37e8d8bef9SDimitry Andric } 38e8d8bef9SDimitry Andric 39e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 42e8d8bef9SDimitry Andric 43e8d8bef9SDimitry Andric private: 44e8d8bef9SDimitry Andric bool optimizeNZCVDefs(MachineBasicBlock &MBB); 45e8d8bef9SDimitry Andric }; 46e8d8bef9SDimitry Andric } // end anonymous namespace 47e8d8bef9SDimitry Andric 48e8d8bef9SDimitry Andric void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 49e8d8bef9SDimitry Andric AU.addRequired<TargetPassConfig>(); 50e8d8bef9SDimitry Andric AU.setPreservesCFG(); 51e8d8bef9SDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 52e8d8bef9SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric 55e8d8bef9SDimitry Andric AArch64PostSelectOptimize::AArch64PostSelectOptimize() 56e8d8bef9SDimitry Andric : MachineFunctionPass(ID) { 57e8d8bef9SDimitry Andric initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 58e8d8bef9SDimitry Andric } 59e8d8bef9SDimitry Andric 60e8d8bef9SDimitry Andric unsigned getNonFlagSettingVariant(unsigned Opc) { 61e8d8bef9SDimitry Andric switch (Opc) { 62e8d8bef9SDimitry Andric default: 63e8d8bef9SDimitry Andric return 0; 64e8d8bef9SDimitry Andric case AArch64::SUBSXrr: 65e8d8bef9SDimitry Andric return AArch64::SUBXrr; 66e8d8bef9SDimitry Andric case AArch64::SUBSWrr: 67e8d8bef9SDimitry Andric return AArch64::SUBWrr; 68e8d8bef9SDimitry Andric case AArch64::SUBSXrs: 69e8d8bef9SDimitry Andric return AArch64::SUBXrs; 70e8d8bef9SDimitry Andric case AArch64::SUBSXri: 71e8d8bef9SDimitry Andric return AArch64::SUBXri; 72e8d8bef9SDimitry Andric case AArch64::SUBSWri: 73e8d8bef9SDimitry Andric return AArch64::SUBWri; 74e8d8bef9SDimitry Andric } 75e8d8bef9SDimitry Andric } 76e8d8bef9SDimitry Andric 77e8d8bef9SDimitry Andric bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 78e8d8bef9SDimitry Andric // Consider the following code: 79e8d8bef9SDimitry Andric // FCMPSrr %0, %1, implicit-def $nzcv 80e8d8bef9SDimitry Andric // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 81e8d8bef9SDimitry Andric // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 82e8d8bef9SDimitry Andric // FCMPSrr %0, %1, implicit-def $nzcv 83e8d8bef9SDimitry Andric // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 84e8d8bef9SDimitry Andric // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 85e8d8bef9SDimitry Andric // when we have a single IR fcmp being used by two selects. During selection, 86e8d8bef9SDimitry Andric // to ensure that there can be no clobbering of nzcv between the fcmp and the 87e8d8bef9SDimitry Andric // csel, we have to generate an fcmp immediately before each csel is 88e8d8bef9SDimitry Andric // selected. 89e8d8bef9SDimitry Andric // However, often we can essentially CSE these together later in MachineCSE. 90e8d8bef9SDimitry Andric // This doesn't work though if there are unrelated flag-setting instructions 91e8d8bef9SDimitry Andric // in between the two FCMPs. In this case, the SUBS defines NZCV 92e8d8bef9SDimitry Andric // but it doesn't have any users, being overwritten by the second FCMP. 93e8d8bef9SDimitry Andric // 94e8d8bef9SDimitry Andric // Our solution here is to try to convert flag setting operations between 95e8d8bef9SDimitry Andric // a interval of identical FCMPs, so that CSE will be able to eliminate one. 96e8d8bef9SDimitry Andric bool Changed = false; 97fe6060f1SDimitry Andric auto &MF = *MBB.getParent(); 98fe6060f1SDimitry Andric auto &Subtarget = MF.getSubtarget(); 99fe6060f1SDimitry Andric const auto &TII = Subtarget.getInstrInfo(); 100fe6060f1SDimitry Andric auto TRI = Subtarget.getRegisterInfo(); 101fe6060f1SDimitry Andric auto RBI = Subtarget.getRegBankInfo(); 102fe6060f1SDimitry Andric auto &MRI = MF.getRegInfo(); 103e8d8bef9SDimitry Andric 104e8d8bef9SDimitry Andric // The first step is to find the first and last FCMPs. If we have found 105e8d8bef9SDimitry Andric // at least two, then set the limit of the bottom-up walk to the first FCMP 106e8d8bef9SDimitry Andric // found since we're only interested in dealing with instructions between 107e8d8bef9SDimitry Andric // them. 108e8d8bef9SDimitry Andric MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; 109e8d8bef9SDimitry Andric for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { 110e8d8bef9SDimitry Andric if (MI.getOpcode() == AArch64::FCMPSrr || 111e8d8bef9SDimitry Andric MI.getOpcode() == AArch64::FCMPDrr) { 112e8d8bef9SDimitry Andric if (!FirstCmp) 113e8d8bef9SDimitry Andric FirstCmp = &MI; 114e8d8bef9SDimitry Andric else 115e8d8bef9SDimitry Andric LastCmp = &MI; 116e8d8bef9SDimitry Andric } 117e8d8bef9SDimitry Andric } 118e8d8bef9SDimitry Andric 119e8d8bef9SDimitry Andric // In addition to converting flag-setting ops in fcmp ranges into non-flag 120e8d8bef9SDimitry Andric // setting ops, across the whole basic block we also detect when nzcv 121e8d8bef9SDimitry Andric // implicit-defs are dead, and mark them as dead. Peephole optimizations need 122e8d8bef9SDimitry Andric // this information later. 123e8d8bef9SDimitry Andric 124e8d8bef9SDimitry Andric LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 125e8d8bef9SDimitry Andric LRU.addLiveOuts(MBB); 126e8d8bef9SDimitry Andric bool NZCVDead = LRU.available(AArch64::NZCV); 127e8d8bef9SDimitry Andric bool InsideCmpRange = false; 128e8d8bef9SDimitry Andric for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 129e8d8bef9SDimitry Andric LRU.stepBackward(II); 130e8d8bef9SDimitry Andric 131e8d8bef9SDimitry Andric if (LastCmp) { // There's a range present in this block. 132e8d8bef9SDimitry Andric // If we're inside an fcmp range, look for begin instruction. 133e8d8bef9SDimitry Andric if (InsideCmpRange && &II == FirstCmp) 134e8d8bef9SDimitry Andric InsideCmpRange = false; 135e8d8bef9SDimitry Andric else if (&II == LastCmp) 136e8d8bef9SDimitry Andric InsideCmpRange = true; 137e8d8bef9SDimitry Andric } 138e8d8bef9SDimitry Andric 139e8d8bef9SDimitry Andric // Did this instruction define NZCV? 140e8d8bef9SDimitry Andric bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); 141e8d8bef9SDimitry Andric if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { 142e8d8bef9SDimitry Andric // If we have a def and NZCV is dead, then we may convert this op. 143e8d8bef9SDimitry Andric unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 144e8d8bef9SDimitry Andric int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 145e8d8bef9SDimitry Andric if (DeadNZCVIdx != -1) { 146e8d8bef9SDimitry Andric // If we're inside an fcmp range, then convert flag setting ops. 147e8d8bef9SDimitry Andric if (InsideCmpRange && NewOpc) { 148e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 149e8d8bef9SDimitry Andric "op in fcmp range: " 150e8d8bef9SDimitry Andric << II); 151e8d8bef9SDimitry Andric II.setDesc(TII->get(NewOpc)); 152*81ad6265SDimitry Andric II.removeOperand(DeadNZCVIdx); 153fe6060f1SDimitry Andric // Changing the opcode can result in differing regclass requirements, 154fe6060f1SDimitry Andric // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp. 155fe6060f1SDimitry Andric // Constrain the regclasses, possibly introducing a copy. 156fe6060f1SDimitry Andric constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(), 157fe6060f1SDimitry Andric II.getOperand(0), 0); 158e8d8bef9SDimitry Andric Changed |= true; 159e8d8bef9SDimitry Andric } else { 160e8d8bef9SDimitry Andric // Otherwise, we just set the nzcv imp-def operand to be dead, so the 161e8d8bef9SDimitry Andric // peephole optimizations can optimize them further. 162e8d8bef9SDimitry Andric II.getOperand(DeadNZCVIdx).setIsDead(); 163e8d8bef9SDimitry Andric } 164e8d8bef9SDimitry Andric } 165e8d8bef9SDimitry Andric } 166e8d8bef9SDimitry Andric 167e8d8bef9SDimitry Andric NZCVDead = NZCVDeadAtCurrInstr; 168e8d8bef9SDimitry Andric } 169e8d8bef9SDimitry Andric return Changed; 170e8d8bef9SDimitry Andric } 171e8d8bef9SDimitry Andric 172e8d8bef9SDimitry Andric bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 173e8d8bef9SDimitry Andric if (MF.getProperties().hasProperty( 174e8d8bef9SDimitry Andric MachineFunctionProperties::Property::FailedISel)) 175e8d8bef9SDimitry Andric return false; 176e8d8bef9SDimitry Andric assert(MF.getProperties().hasProperty( 177e8d8bef9SDimitry Andric MachineFunctionProperties::Property::Selected) && 178e8d8bef9SDimitry Andric "Expected a selected MF"); 179e8d8bef9SDimitry Andric 180e8d8bef9SDimitry Andric bool Changed = false; 181e8d8bef9SDimitry Andric for (auto &BB : MF) 182e8d8bef9SDimitry Andric Changed |= optimizeNZCVDefs(BB); 183fe6060f1SDimitry Andric return Changed; 184e8d8bef9SDimitry Andric } 185e8d8bef9SDimitry Andric 186e8d8bef9SDimitry Andric char AArch64PostSelectOptimize::ID = 0; 187e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 188e8d8bef9SDimitry Andric "Optimize AArch64 selected instructions", 189e8d8bef9SDimitry Andric false, false) 190e8d8bef9SDimitry Andric INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 191e8d8bef9SDimitry Andric "Optimize AArch64 selected instructions", false, 192e8d8bef9SDimitry Andric false) 193e8d8bef9SDimitry Andric 194e8d8bef9SDimitry Andric namespace llvm { 195e8d8bef9SDimitry Andric FunctionPass *createAArch64PostSelectOptimize() { 196e8d8bef9SDimitry Andric return new AArch64PostSelectOptimize(); 197e8d8bef9SDimitry Andric } 198e8d8bef9SDimitry Andric } // end namespace llvm 199