106f32e7eSjoerg //===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
206f32e7eSjoerg //
306f32e7eSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406f32e7eSjoerg // See https://llvm.org/LICENSE.txt for license information.
506f32e7eSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606f32e7eSjoerg //
706f32e7eSjoerg //===----------------------------------------------------------------------===//
806f32e7eSjoerg //
906f32e7eSjoerg // This file implements the CriticalAntiDepBreaker class, which
1006f32e7eSjoerg // implements register anti-dependence breaking along a blocks
1106f32e7eSjoerg // critical path during post-RA scheduler.
1206f32e7eSjoerg //
1306f32e7eSjoerg //===----------------------------------------------------------------------===//
1406f32e7eSjoerg
1506f32e7eSjoerg #include "CriticalAntiDepBreaker.h"
1606f32e7eSjoerg #include "llvm/ADT/ArrayRef.h"
1706f32e7eSjoerg #include "llvm/ADT/DenseMap.h"
1806f32e7eSjoerg #include "llvm/ADT/SmallVector.h"
1906f32e7eSjoerg #include "llvm/CodeGen/MachineBasicBlock.h"
2006f32e7eSjoerg #include "llvm/CodeGen/MachineFrameInfo.h"
2106f32e7eSjoerg #include "llvm/CodeGen/MachineFunction.h"
2206f32e7eSjoerg #include "llvm/CodeGen/MachineInstr.h"
2306f32e7eSjoerg #include "llvm/CodeGen/MachineOperand.h"
2406f32e7eSjoerg #include "llvm/CodeGen/MachineRegisterInfo.h"
2506f32e7eSjoerg #include "llvm/CodeGen/RegisterClassInfo.h"
2606f32e7eSjoerg #include "llvm/CodeGen/ScheduleDAG.h"
2706f32e7eSjoerg #include "llvm/CodeGen/TargetInstrInfo.h"
2806f32e7eSjoerg #include "llvm/CodeGen/TargetRegisterInfo.h"
2906f32e7eSjoerg #include "llvm/CodeGen/TargetSubtargetInfo.h"
3006f32e7eSjoerg #include "llvm/MC/MCInstrDesc.h"
3106f32e7eSjoerg #include "llvm/MC/MCRegisterInfo.h"
3206f32e7eSjoerg #include "llvm/Support/Debug.h"
3306f32e7eSjoerg #include "llvm/Support/raw_ostream.h"
3406f32e7eSjoerg #include <cassert>
3506f32e7eSjoerg #include <utility>
3606f32e7eSjoerg
3706f32e7eSjoerg using namespace llvm;
3806f32e7eSjoerg
3906f32e7eSjoerg #define DEBUG_TYPE "post-RA-sched"
4006f32e7eSjoerg
CriticalAntiDepBreaker(MachineFunction & MFi,const RegisterClassInfo & RCI)4106f32e7eSjoerg CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
4206f32e7eSjoerg const RegisterClassInfo &RCI)
4306f32e7eSjoerg : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
4406f32e7eSjoerg TII(MF.getSubtarget().getInstrInfo()),
4506f32e7eSjoerg TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
4606f32e7eSjoerg Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
4706f32e7eSjoerg DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
4806f32e7eSjoerg
4906f32e7eSjoerg CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
5006f32e7eSjoerg
StartBlock(MachineBasicBlock * BB)5106f32e7eSjoerg void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
5206f32e7eSjoerg const unsigned BBSize = BB->size();
5306f32e7eSjoerg for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
5406f32e7eSjoerg // Clear out the register class data.
5506f32e7eSjoerg Classes[i] = nullptr;
5606f32e7eSjoerg
5706f32e7eSjoerg // Initialize the indices to indicate that no registers are live.
5806f32e7eSjoerg KillIndices[i] = ~0u;
5906f32e7eSjoerg DefIndices[i] = BBSize;
6006f32e7eSjoerg }
6106f32e7eSjoerg
6206f32e7eSjoerg // Clear "do not change" set.
6306f32e7eSjoerg KeepRegs.reset();
6406f32e7eSjoerg
6506f32e7eSjoerg bool IsReturnBlock = BB->isReturnBlock();
6606f32e7eSjoerg
6706f32e7eSjoerg // Examine the live-in regs of all successors.
68*da58b97aSjoerg for (const MachineBasicBlock *Succ : BB->successors())
69*da58b97aSjoerg for (const auto &LI : Succ->liveins()) {
7006f32e7eSjoerg for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
7106f32e7eSjoerg unsigned Reg = *AI;
7206f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
7306f32e7eSjoerg KillIndices[Reg] = BBSize;
7406f32e7eSjoerg DefIndices[Reg] = ~0u;
7506f32e7eSjoerg }
7606f32e7eSjoerg }
7706f32e7eSjoerg
7806f32e7eSjoerg // Mark live-out callee-saved registers. In a return block this is
7906f32e7eSjoerg // all callee-saved registers. In non-return this is any
8006f32e7eSjoerg // callee-saved register that is not saved in the prolog.
8106f32e7eSjoerg const MachineFrameInfo &MFI = MF.getFrameInfo();
8206f32e7eSjoerg BitVector Pristine = MFI.getPristineRegs(MF);
8306f32e7eSjoerg for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
8406f32e7eSjoerg ++I) {
8506f32e7eSjoerg unsigned Reg = *I;
8606f32e7eSjoerg if (!IsReturnBlock && !Pristine.test(Reg))
8706f32e7eSjoerg continue;
8806f32e7eSjoerg for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
8906f32e7eSjoerg unsigned Reg = *AI;
9006f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
9106f32e7eSjoerg KillIndices[Reg] = BBSize;
9206f32e7eSjoerg DefIndices[Reg] = ~0u;
9306f32e7eSjoerg }
9406f32e7eSjoerg }
9506f32e7eSjoerg }
9606f32e7eSjoerg
FinishBlock()9706f32e7eSjoerg void CriticalAntiDepBreaker::FinishBlock() {
9806f32e7eSjoerg RegRefs.clear();
9906f32e7eSjoerg KeepRegs.reset();
10006f32e7eSjoerg }
10106f32e7eSjoerg
Observe(MachineInstr & MI,unsigned Count,unsigned InsertPosIndex)10206f32e7eSjoerg void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
10306f32e7eSjoerg unsigned InsertPosIndex) {
10406f32e7eSjoerg // Kill instructions can define registers but are really nops, and there might
10506f32e7eSjoerg // be a real definition earlier that needs to be paired with uses dominated by
10606f32e7eSjoerg // this kill.
10706f32e7eSjoerg
10806f32e7eSjoerg // FIXME: It may be possible to remove the isKill() restriction once PR18663
10906f32e7eSjoerg // has been properly fixed. There can be value in processing kills as seen in
11006f32e7eSjoerg // the AggressiveAntiDepBreaker class.
11106f32e7eSjoerg if (MI.isDebugInstr() || MI.isKill())
11206f32e7eSjoerg return;
11306f32e7eSjoerg assert(Count < InsertPosIndex && "Instruction index out of expected range!");
11406f32e7eSjoerg
11506f32e7eSjoerg for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
11606f32e7eSjoerg if (KillIndices[Reg] != ~0u) {
11706f32e7eSjoerg // If Reg is currently live, then mark that it can't be renamed as
11806f32e7eSjoerg // we don't know the extent of its live-range anymore (now that it
11906f32e7eSjoerg // has been scheduled).
12006f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
12106f32e7eSjoerg KillIndices[Reg] = Count;
12206f32e7eSjoerg } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
12306f32e7eSjoerg // Any register which was defined within the previous scheduling region
12406f32e7eSjoerg // may have been rescheduled and its lifetime may overlap with registers
12506f32e7eSjoerg // in ways not reflected in our current liveness state. For each such
12606f32e7eSjoerg // register, adjust the liveness state to be conservatively correct.
12706f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
12806f32e7eSjoerg
12906f32e7eSjoerg // Move the def index to the end of the previous region, to reflect
13006f32e7eSjoerg // that the def could theoretically have been scheduled at the end.
13106f32e7eSjoerg DefIndices[Reg] = InsertPosIndex;
13206f32e7eSjoerg }
13306f32e7eSjoerg }
13406f32e7eSjoerg
13506f32e7eSjoerg PrescanInstruction(MI);
13606f32e7eSjoerg ScanInstruction(MI, Count);
13706f32e7eSjoerg }
13806f32e7eSjoerg
13906f32e7eSjoerg /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
14006f32e7eSjoerg /// critical path.
CriticalPathStep(const SUnit * SU)14106f32e7eSjoerg static const SDep *CriticalPathStep(const SUnit *SU) {
14206f32e7eSjoerg const SDep *Next = nullptr;
14306f32e7eSjoerg unsigned NextDepth = 0;
14406f32e7eSjoerg // Find the predecessor edge with the greatest depth.
145*da58b97aSjoerg for (const SDep &P : SU->Preds) {
146*da58b97aSjoerg const SUnit *PredSU = P.getSUnit();
147*da58b97aSjoerg unsigned PredLatency = P.getLatency();
14806f32e7eSjoerg unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
14906f32e7eSjoerg // In the case of a latency tie, prefer an anti-dependency edge over
15006f32e7eSjoerg // other types of edges.
15106f32e7eSjoerg if (NextDepth < PredTotalLatency ||
152*da58b97aSjoerg (NextDepth == PredTotalLatency && P.getKind() == SDep::Anti)) {
15306f32e7eSjoerg NextDepth = PredTotalLatency;
154*da58b97aSjoerg Next = &P;
15506f32e7eSjoerg }
15606f32e7eSjoerg }
15706f32e7eSjoerg return Next;
15806f32e7eSjoerg }
15906f32e7eSjoerg
PrescanInstruction(MachineInstr & MI)16006f32e7eSjoerg void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
16106f32e7eSjoerg // It's not safe to change register allocation for source operands of
16206f32e7eSjoerg // instructions that have special allocation requirements. Also assume all
16306f32e7eSjoerg // registers used in a call must not be changed (ABI).
16406f32e7eSjoerg // FIXME: The issue with predicated instruction is more complex. We are being
16506f32e7eSjoerg // conservative here because the kill markers cannot be trusted after
16606f32e7eSjoerg // if-conversion:
16706f32e7eSjoerg // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]
16806f32e7eSjoerg // ...
16906f32e7eSjoerg // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395]
17006f32e7eSjoerg // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12]
17106f32e7eSjoerg // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)
17206f32e7eSjoerg //
17306f32e7eSjoerg // The first R6 kill is not really a kill since it's killed by a predicated
17406f32e7eSjoerg // instruction which may not be executed. The second R6 def may or may not
17506f32e7eSjoerg // re-define R6 so it's not safe to change it since the last R6 use cannot be
17606f32e7eSjoerg // changed.
17706f32e7eSjoerg bool Special =
17806f32e7eSjoerg MI.isCall() || MI.hasExtraSrcRegAllocReq() || TII->isPredicated(MI);
17906f32e7eSjoerg
18006f32e7eSjoerg // Scan the register operands for this instruction and update
18106f32e7eSjoerg // Classes and RegRefs.
18206f32e7eSjoerg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
18306f32e7eSjoerg MachineOperand &MO = MI.getOperand(i);
18406f32e7eSjoerg if (!MO.isReg()) continue;
18506f32e7eSjoerg Register Reg = MO.getReg();
18606f32e7eSjoerg if (Reg == 0) continue;
18706f32e7eSjoerg const TargetRegisterClass *NewRC = nullptr;
18806f32e7eSjoerg
18906f32e7eSjoerg if (i < MI.getDesc().getNumOperands())
19006f32e7eSjoerg NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
19106f32e7eSjoerg
19206f32e7eSjoerg // For now, only allow the register to be changed if its register
19306f32e7eSjoerg // class is consistent across all uses.
19406f32e7eSjoerg if (!Classes[Reg] && NewRC)
19506f32e7eSjoerg Classes[Reg] = NewRC;
19606f32e7eSjoerg else if (!NewRC || Classes[Reg] != NewRC)
19706f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
19806f32e7eSjoerg
19906f32e7eSjoerg // Now check for aliases.
20006f32e7eSjoerg for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
20106f32e7eSjoerg // If an alias of the reg is used during the live range, give up.
20206f32e7eSjoerg // Note that this allows us to skip checking if AntiDepReg
20306f32e7eSjoerg // overlaps with any of the aliases, among other things.
20406f32e7eSjoerg unsigned AliasReg = *AI;
20506f32e7eSjoerg if (Classes[AliasReg]) {
20606f32e7eSjoerg Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
20706f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
20806f32e7eSjoerg }
20906f32e7eSjoerg }
21006f32e7eSjoerg
21106f32e7eSjoerg // If we're still willing to consider this register, note the reference.
21206f32e7eSjoerg if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
21306f32e7eSjoerg RegRefs.insert(std::make_pair(Reg, &MO));
21406f32e7eSjoerg
21506f32e7eSjoerg // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
21606f32e7eSjoerg // it or any of its sub or super regs. We need to use KeepRegs to mark the
21706f32e7eSjoerg // reg because not all uses of the same reg within an instruction are
21806f32e7eSjoerg // necessarily tagged as tied.
21906f32e7eSjoerg // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
22006f32e7eSjoerg // def register but not the second (see PR20020 for details).
22106f32e7eSjoerg // FIXME: can this check be relaxed to account for undef uses
22206f32e7eSjoerg // of a register? In the above 'xor' example, the uses of %eax are undef, so
22306f32e7eSjoerg // earlier instructions could still replace %eax even though the 'xor'
22406f32e7eSjoerg // itself can't be changed.
22506f32e7eSjoerg if (MI.isRegTiedToUseOperand(i) &&
22606f32e7eSjoerg Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
22706f32e7eSjoerg for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
22806f32e7eSjoerg SubRegs.isValid(); ++SubRegs) {
22906f32e7eSjoerg KeepRegs.set(*SubRegs);
23006f32e7eSjoerg }
23106f32e7eSjoerg for (MCSuperRegIterator SuperRegs(Reg, TRI);
23206f32e7eSjoerg SuperRegs.isValid(); ++SuperRegs) {
23306f32e7eSjoerg KeepRegs.set(*SuperRegs);
23406f32e7eSjoerg }
23506f32e7eSjoerg }
23606f32e7eSjoerg
23706f32e7eSjoerg if (MO.isUse() && Special) {
23806f32e7eSjoerg if (!KeepRegs.test(Reg)) {
23906f32e7eSjoerg for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
24006f32e7eSjoerg SubRegs.isValid(); ++SubRegs)
24106f32e7eSjoerg KeepRegs.set(*SubRegs);
24206f32e7eSjoerg }
24306f32e7eSjoerg }
24406f32e7eSjoerg }
24506f32e7eSjoerg }
24606f32e7eSjoerg
ScanInstruction(MachineInstr & MI,unsigned Count)24706f32e7eSjoerg void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
24806f32e7eSjoerg // Update liveness.
24906f32e7eSjoerg // Proceeding upwards, registers that are defed but not used in this
25006f32e7eSjoerg // instruction are now dead.
25106f32e7eSjoerg assert(!MI.isKill() && "Attempting to scan a kill instruction");
25206f32e7eSjoerg
25306f32e7eSjoerg if (!TII->isPredicated(MI)) {
25406f32e7eSjoerg // Predicated defs are modeled as read + write, i.e. similar to two
25506f32e7eSjoerg // address updates.
25606f32e7eSjoerg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
25706f32e7eSjoerg MachineOperand &MO = MI.getOperand(i);
25806f32e7eSjoerg
259*da58b97aSjoerg if (MO.isRegMask()) {
260*da58b97aSjoerg auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
261*da58b97aSjoerg for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
262*da58b97aSjoerg if (!MO.clobbersPhysReg(*SRI))
263*da58b97aSjoerg return false;
264*da58b97aSjoerg
265*da58b97aSjoerg return true;
266*da58b97aSjoerg };
267*da58b97aSjoerg
268*da58b97aSjoerg for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
269*da58b97aSjoerg if (ClobbersPhysRegAndSubRegs(i)) {
27006f32e7eSjoerg DefIndices[i] = Count;
27106f32e7eSjoerg KillIndices[i] = ~0u;
27206f32e7eSjoerg KeepRegs.reset(i);
27306f32e7eSjoerg Classes[i] = nullptr;
27406f32e7eSjoerg RegRefs.erase(i);
27506f32e7eSjoerg }
276*da58b97aSjoerg }
277*da58b97aSjoerg }
27806f32e7eSjoerg
27906f32e7eSjoerg if (!MO.isReg()) continue;
28006f32e7eSjoerg Register Reg = MO.getReg();
28106f32e7eSjoerg if (Reg == 0) continue;
28206f32e7eSjoerg if (!MO.isDef()) continue;
28306f32e7eSjoerg
28406f32e7eSjoerg // Ignore two-addr defs.
28506f32e7eSjoerg if (MI.isRegTiedToUseOperand(i))
28606f32e7eSjoerg continue;
28706f32e7eSjoerg
28806f32e7eSjoerg // If we've already marked this reg as unchangeable, don't remove
28906f32e7eSjoerg // it or any of its subregs from KeepRegs.
29006f32e7eSjoerg bool Keep = KeepRegs.test(Reg);
29106f32e7eSjoerg
29206f32e7eSjoerg // For the reg itself and all subregs: update the def to current;
29306f32e7eSjoerg // reset the kill state, any restrictions, and references.
29406f32e7eSjoerg for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
29506f32e7eSjoerg unsigned SubregReg = *SRI;
29606f32e7eSjoerg DefIndices[SubregReg] = Count;
29706f32e7eSjoerg KillIndices[SubregReg] = ~0u;
29806f32e7eSjoerg Classes[SubregReg] = nullptr;
29906f32e7eSjoerg RegRefs.erase(SubregReg);
30006f32e7eSjoerg if (!Keep)
30106f32e7eSjoerg KeepRegs.reset(SubregReg);
30206f32e7eSjoerg }
30306f32e7eSjoerg // Conservatively mark super-registers as unusable.
30406f32e7eSjoerg for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
30506f32e7eSjoerg Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
30606f32e7eSjoerg }
30706f32e7eSjoerg }
30806f32e7eSjoerg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
30906f32e7eSjoerg MachineOperand &MO = MI.getOperand(i);
31006f32e7eSjoerg if (!MO.isReg()) continue;
31106f32e7eSjoerg Register Reg = MO.getReg();
31206f32e7eSjoerg if (Reg == 0) continue;
31306f32e7eSjoerg if (!MO.isUse()) continue;
31406f32e7eSjoerg
31506f32e7eSjoerg const TargetRegisterClass *NewRC = nullptr;
31606f32e7eSjoerg if (i < MI.getDesc().getNumOperands())
31706f32e7eSjoerg NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
31806f32e7eSjoerg
31906f32e7eSjoerg // For now, only allow the register to be changed if its register
32006f32e7eSjoerg // class is consistent across all uses.
32106f32e7eSjoerg if (!Classes[Reg] && NewRC)
32206f32e7eSjoerg Classes[Reg] = NewRC;
32306f32e7eSjoerg else if (!NewRC || Classes[Reg] != NewRC)
32406f32e7eSjoerg Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
32506f32e7eSjoerg
32606f32e7eSjoerg RegRefs.insert(std::make_pair(Reg, &MO));
32706f32e7eSjoerg
32806f32e7eSjoerg // It wasn't previously live but now it is, this is a kill.
32906f32e7eSjoerg // Repeat for all aliases.
33006f32e7eSjoerg for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
33106f32e7eSjoerg unsigned AliasReg = *AI;
33206f32e7eSjoerg if (KillIndices[AliasReg] == ~0u) {
33306f32e7eSjoerg KillIndices[AliasReg] = Count;
33406f32e7eSjoerg DefIndices[AliasReg] = ~0u;
33506f32e7eSjoerg }
33606f32e7eSjoerg }
33706f32e7eSjoerg }
33806f32e7eSjoerg }
33906f32e7eSjoerg
34006f32e7eSjoerg // Check all machine operands that reference the antidependent register and must
34106f32e7eSjoerg // be replaced by NewReg. Return true if any of their parent instructions may
34206f32e7eSjoerg // clobber the new register.
34306f32e7eSjoerg //
34406f32e7eSjoerg // Note: AntiDepReg may be referenced by a two-address instruction such that
34506f32e7eSjoerg // it's use operand is tied to a def operand. We guard against the case in which
34606f32e7eSjoerg // the two-address instruction also defines NewReg, as may happen with
34706f32e7eSjoerg // pre/postincrement loads. In this case, both the use and def operands are in
34806f32e7eSjoerg // RegRefs because the def is inserted by PrescanInstruction and not erased
34906f32e7eSjoerg // during ScanInstruction. So checking for an instruction with definitions of
35006f32e7eSjoerg // both NewReg and AntiDepReg covers it.
35106f32e7eSjoerg bool
isNewRegClobberedByRefs(RegRefIter RegRefBegin,RegRefIter RegRefEnd,unsigned NewReg)35206f32e7eSjoerg CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
35306f32e7eSjoerg RegRefIter RegRefEnd,
35406f32e7eSjoerg unsigned NewReg) {
35506f32e7eSjoerg for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
35606f32e7eSjoerg MachineOperand *RefOper = I->second;
35706f32e7eSjoerg
35806f32e7eSjoerg // Don't allow the instruction defining AntiDepReg to earlyclobber its
35906f32e7eSjoerg // operands, in case they may be assigned to NewReg. In this case antidep
36006f32e7eSjoerg // breaking must fail, but it's too rare to bother optimizing.
36106f32e7eSjoerg if (RefOper->isDef() && RefOper->isEarlyClobber())
36206f32e7eSjoerg return true;
36306f32e7eSjoerg
36406f32e7eSjoerg // Handle cases in which this instruction defines NewReg.
36506f32e7eSjoerg MachineInstr *MI = RefOper->getParent();
36606f32e7eSjoerg for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
36706f32e7eSjoerg const MachineOperand &CheckOper = MI->getOperand(i);
36806f32e7eSjoerg
36906f32e7eSjoerg if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
37006f32e7eSjoerg return true;
37106f32e7eSjoerg
37206f32e7eSjoerg if (!CheckOper.isReg() || !CheckOper.isDef() ||
37306f32e7eSjoerg CheckOper.getReg() != NewReg)
37406f32e7eSjoerg continue;
37506f32e7eSjoerg
37606f32e7eSjoerg // Don't allow the instruction to define NewReg and AntiDepReg.
37706f32e7eSjoerg // When AntiDepReg is renamed it will be an illegal op.
37806f32e7eSjoerg if (RefOper->isDef())
37906f32e7eSjoerg return true;
38006f32e7eSjoerg
38106f32e7eSjoerg // Don't allow an instruction using AntiDepReg to be earlyclobbered by
38206f32e7eSjoerg // NewReg.
38306f32e7eSjoerg if (CheckOper.isEarlyClobber())
38406f32e7eSjoerg return true;
38506f32e7eSjoerg
38606f32e7eSjoerg // Don't allow inline asm to define NewReg at all. Who knows what it's
38706f32e7eSjoerg // doing with it.
38806f32e7eSjoerg if (MI->isInlineAsm())
38906f32e7eSjoerg return true;
39006f32e7eSjoerg }
39106f32e7eSjoerg }
39206f32e7eSjoerg return false;
39306f32e7eSjoerg }
39406f32e7eSjoerg
39506f32e7eSjoerg unsigned CriticalAntiDepBreaker::
findSuitableFreeRegister(RegRefIter RegRefBegin,RegRefIter RegRefEnd,unsigned AntiDepReg,unsigned LastNewReg,const TargetRegisterClass * RC,SmallVectorImpl<unsigned> & Forbid)39606f32e7eSjoerg findSuitableFreeRegister(RegRefIter RegRefBegin,
39706f32e7eSjoerg RegRefIter RegRefEnd,
39806f32e7eSjoerg unsigned AntiDepReg,
39906f32e7eSjoerg unsigned LastNewReg,
40006f32e7eSjoerg const TargetRegisterClass *RC,
40106f32e7eSjoerg SmallVectorImpl<unsigned> &Forbid) {
40206f32e7eSjoerg ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
40306f32e7eSjoerg for (unsigned i = 0; i != Order.size(); ++i) {
40406f32e7eSjoerg unsigned NewReg = Order[i];
40506f32e7eSjoerg // Don't replace a register with itself.
40606f32e7eSjoerg if (NewReg == AntiDepReg) continue;
40706f32e7eSjoerg // Don't replace a register with one that was recently used to repair
40806f32e7eSjoerg // an anti-dependence with this AntiDepReg, because that would
40906f32e7eSjoerg // re-introduce that anti-dependence.
41006f32e7eSjoerg if (NewReg == LastNewReg) continue;
41106f32e7eSjoerg // If any instructions that define AntiDepReg also define the NewReg, it's
41206f32e7eSjoerg // not suitable. For example, Instruction with multiple definitions can
41306f32e7eSjoerg // result in this condition.
41406f32e7eSjoerg if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
41506f32e7eSjoerg // If NewReg is dead and NewReg's most recent def is not before
41606f32e7eSjoerg // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
41706f32e7eSjoerg assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
41806f32e7eSjoerg && "Kill and Def maps aren't consistent for AntiDepReg!");
41906f32e7eSjoerg assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
42006f32e7eSjoerg && "Kill and Def maps aren't consistent for NewReg!");
42106f32e7eSjoerg if (KillIndices[NewReg] != ~0u ||
42206f32e7eSjoerg Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
42306f32e7eSjoerg KillIndices[AntiDepReg] > DefIndices[NewReg])
42406f32e7eSjoerg continue;
42506f32e7eSjoerg // If NewReg overlaps any of the forbidden registers, we can't use it.
42606f32e7eSjoerg bool Forbidden = false;
427*da58b97aSjoerg for (unsigned R : Forbid)
428*da58b97aSjoerg if (TRI->regsOverlap(NewReg, R)) {
42906f32e7eSjoerg Forbidden = true;
43006f32e7eSjoerg break;
43106f32e7eSjoerg }
43206f32e7eSjoerg if (Forbidden) continue;
43306f32e7eSjoerg return NewReg;
43406f32e7eSjoerg }
43506f32e7eSjoerg
43606f32e7eSjoerg // No registers are free and available!
43706f32e7eSjoerg return 0;
43806f32e7eSjoerg }
43906f32e7eSjoerg
44006f32e7eSjoerg unsigned CriticalAntiDepBreaker::
BreakAntiDependencies(const std::vector<SUnit> & SUnits,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned InsertPosIndex,DbgValueVector & DbgValues)44106f32e7eSjoerg BreakAntiDependencies(const std::vector<SUnit> &SUnits,
44206f32e7eSjoerg MachineBasicBlock::iterator Begin,
44306f32e7eSjoerg MachineBasicBlock::iterator End,
44406f32e7eSjoerg unsigned InsertPosIndex,
44506f32e7eSjoerg DbgValueVector &DbgValues) {
44606f32e7eSjoerg // The code below assumes that there is at least one instruction,
44706f32e7eSjoerg // so just duck out immediately if the block is empty.
44806f32e7eSjoerg if (SUnits.empty()) return 0;
44906f32e7eSjoerg
45006f32e7eSjoerg // Keep a map of the MachineInstr*'s back to the SUnit representing them.
45106f32e7eSjoerg // This is used for updating debug information.
45206f32e7eSjoerg //
45306f32e7eSjoerg // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
45406f32e7eSjoerg DenseMap<MachineInstr *, const SUnit *> MISUnitMap;
45506f32e7eSjoerg
45606f32e7eSjoerg // Find the node at the bottom of the critical path.
45706f32e7eSjoerg const SUnit *Max = nullptr;
45806f32e7eSjoerg for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
45906f32e7eSjoerg const SUnit *SU = &SUnits[i];
46006f32e7eSjoerg MISUnitMap[SU->getInstr()] = SU;
46106f32e7eSjoerg if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
46206f32e7eSjoerg Max = SU;
46306f32e7eSjoerg }
46406f32e7eSjoerg assert(Max && "Failed to find bottom of the critical path");
46506f32e7eSjoerg
46606f32e7eSjoerg #ifndef NDEBUG
46706f32e7eSjoerg {
46806f32e7eSjoerg LLVM_DEBUG(dbgs() << "Critical path has total latency "
46906f32e7eSjoerg << (Max->getDepth() + Max->Latency) << "\n");
47006f32e7eSjoerg LLVM_DEBUG(dbgs() << "Available regs:");
47106f32e7eSjoerg for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
47206f32e7eSjoerg if (KillIndices[Reg] == ~0u)
47306f32e7eSjoerg LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
47406f32e7eSjoerg }
47506f32e7eSjoerg LLVM_DEBUG(dbgs() << '\n');
47606f32e7eSjoerg }
47706f32e7eSjoerg #endif
47806f32e7eSjoerg
47906f32e7eSjoerg // Track progress along the critical path through the SUnit graph as we walk
48006f32e7eSjoerg // the instructions.
48106f32e7eSjoerg const SUnit *CriticalPathSU = Max;
48206f32e7eSjoerg MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
48306f32e7eSjoerg
48406f32e7eSjoerg // Consider this pattern:
48506f32e7eSjoerg // A = ...
48606f32e7eSjoerg // ... = A
48706f32e7eSjoerg // A = ...
48806f32e7eSjoerg // ... = A
48906f32e7eSjoerg // A = ...
49006f32e7eSjoerg // ... = A
49106f32e7eSjoerg // A = ...
49206f32e7eSjoerg // ... = A
49306f32e7eSjoerg // There are three anti-dependencies here, and without special care,
49406f32e7eSjoerg // we'd break all of them using the same register:
49506f32e7eSjoerg // A = ...
49606f32e7eSjoerg // ... = A
49706f32e7eSjoerg // B = ...
49806f32e7eSjoerg // ... = B
49906f32e7eSjoerg // B = ...
50006f32e7eSjoerg // ... = B
50106f32e7eSjoerg // B = ...
50206f32e7eSjoerg // ... = B
50306f32e7eSjoerg // because at each anti-dependence, B is the first register that
50406f32e7eSjoerg // isn't A which is free. This re-introduces anti-dependencies
50506f32e7eSjoerg // at all but one of the original anti-dependencies that we were
50606f32e7eSjoerg // trying to break. To avoid this, keep track of the most recent
50706f32e7eSjoerg // register that each register was replaced with, avoid
50806f32e7eSjoerg // using it to repair an anti-dependence on the same register.
50906f32e7eSjoerg // This lets us produce this:
51006f32e7eSjoerg // A = ...
51106f32e7eSjoerg // ... = A
51206f32e7eSjoerg // B = ...
51306f32e7eSjoerg // ... = B
51406f32e7eSjoerg // C = ...
51506f32e7eSjoerg // ... = C
51606f32e7eSjoerg // B = ...
51706f32e7eSjoerg // ... = B
51806f32e7eSjoerg // This still has an anti-dependence on B, but at least it isn't on the
51906f32e7eSjoerg // original critical path.
52006f32e7eSjoerg //
52106f32e7eSjoerg // TODO: If we tracked more than one register here, we could potentially
52206f32e7eSjoerg // fix that remaining critical edge too. This is a little more involved,
52306f32e7eSjoerg // because unlike the most recent register, less recent registers should
52406f32e7eSjoerg // still be considered, though only if no other registers are available.
52506f32e7eSjoerg std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
52606f32e7eSjoerg
52706f32e7eSjoerg // Attempt to break anti-dependence edges on the critical path. Walk the
52806f32e7eSjoerg // instructions from the bottom up, tracking information about liveness
52906f32e7eSjoerg // as we go to help determine which registers are available.
53006f32e7eSjoerg unsigned Broken = 0;
53106f32e7eSjoerg unsigned Count = InsertPosIndex - 1;
53206f32e7eSjoerg for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
53306f32e7eSjoerg MachineInstr &MI = *--I;
53406f32e7eSjoerg // Kill instructions can define registers but are really nops, and there
53506f32e7eSjoerg // might be a real definition earlier that needs to be paired with uses
53606f32e7eSjoerg // dominated by this kill.
53706f32e7eSjoerg
53806f32e7eSjoerg // FIXME: It may be possible to remove the isKill() restriction once PR18663
53906f32e7eSjoerg // has been properly fixed. There can be value in processing kills as seen
54006f32e7eSjoerg // in the AggressiveAntiDepBreaker class.
54106f32e7eSjoerg if (MI.isDebugInstr() || MI.isKill())
54206f32e7eSjoerg continue;
54306f32e7eSjoerg
54406f32e7eSjoerg // Check if this instruction has a dependence on the critical path that
54506f32e7eSjoerg // is an anti-dependence that we may be able to break. If it is, set
54606f32e7eSjoerg // AntiDepReg to the non-zero register associated with the anti-dependence.
54706f32e7eSjoerg //
54806f32e7eSjoerg // We limit our attention to the critical path as a heuristic to avoid
54906f32e7eSjoerg // breaking anti-dependence edges that aren't going to significantly
55006f32e7eSjoerg // impact the overall schedule. There are a limited number of registers
55106f32e7eSjoerg // and we want to save them for the important edges.
55206f32e7eSjoerg //
55306f32e7eSjoerg // TODO: Instructions with multiple defs could have multiple
55406f32e7eSjoerg // anti-dependencies. The current code here only knows how to break one
55506f32e7eSjoerg // edge per instruction. Note that we'd have to be able to break all of
55606f32e7eSjoerg // the anti-dependencies in an instruction in order to be effective.
55706f32e7eSjoerg unsigned AntiDepReg = 0;
55806f32e7eSjoerg if (&MI == CriticalPathMI) {
55906f32e7eSjoerg if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
56006f32e7eSjoerg const SUnit *NextSU = Edge->getSUnit();
56106f32e7eSjoerg
56206f32e7eSjoerg // Only consider anti-dependence edges.
56306f32e7eSjoerg if (Edge->getKind() == SDep::Anti) {
56406f32e7eSjoerg AntiDepReg = Edge->getReg();
56506f32e7eSjoerg assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
56606f32e7eSjoerg if (!MRI.isAllocatable(AntiDepReg))
56706f32e7eSjoerg // Don't break anti-dependencies on non-allocatable registers.
56806f32e7eSjoerg AntiDepReg = 0;
56906f32e7eSjoerg else if (KeepRegs.test(AntiDepReg))
57006f32e7eSjoerg // Don't break anti-dependencies if a use down below requires
57106f32e7eSjoerg // this exact register.
57206f32e7eSjoerg AntiDepReg = 0;
57306f32e7eSjoerg else {
57406f32e7eSjoerg // If the SUnit has other dependencies on the SUnit that it
57506f32e7eSjoerg // anti-depends on, don't bother breaking the anti-dependency
57606f32e7eSjoerg // since those edges would prevent such units from being
57706f32e7eSjoerg // scheduled past each other regardless.
57806f32e7eSjoerg //
57906f32e7eSjoerg // Also, if there are dependencies on other SUnits with the
58006f32e7eSjoerg // same register as the anti-dependency, don't attempt to
58106f32e7eSjoerg // break it.
582*da58b97aSjoerg for (const SDep &P : CriticalPathSU->Preds)
583*da58b97aSjoerg if (P.getSUnit() == NextSU
584*da58b97aSjoerg ? (P.getKind() != SDep::Anti || P.getReg() != AntiDepReg)
585*da58b97aSjoerg : (P.getKind() == SDep::Data &&
586*da58b97aSjoerg P.getReg() == AntiDepReg)) {
58706f32e7eSjoerg AntiDepReg = 0;
58806f32e7eSjoerg break;
58906f32e7eSjoerg }
59006f32e7eSjoerg }
59106f32e7eSjoerg }
59206f32e7eSjoerg CriticalPathSU = NextSU;
59306f32e7eSjoerg CriticalPathMI = CriticalPathSU->getInstr();
59406f32e7eSjoerg } else {
59506f32e7eSjoerg // We've reached the end of the critical path.
59606f32e7eSjoerg CriticalPathSU = nullptr;
59706f32e7eSjoerg CriticalPathMI = nullptr;
59806f32e7eSjoerg }
59906f32e7eSjoerg }
60006f32e7eSjoerg
60106f32e7eSjoerg PrescanInstruction(MI);
60206f32e7eSjoerg
60306f32e7eSjoerg SmallVector<unsigned, 2> ForbidRegs;
60406f32e7eSjoerg
60506f32e7eSjoerg // If MI's defs have a special allocation requirement, don't allow
60606f32e7eSjoerg // any def registers to be changed. Also assume all registers
60706f32e7eSjoerg // defined in a call must not be changed (ABI).
60806f32e7eSjoerg if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI))
60906f32e7eSjoerg // If this instruction's defs have special allocation requirement, don't
61006f32e7eSjoerg // break this anti-dependency.
61106f32e7eSjoerg AntiDepReg = 0;
61206f32e7eSjoerg else if (AntiDepReg) {
61306f32e7eSjoerg // If this instruction has a use of AntiDepReg, breaking it
61406f32e7eSjoerg // is invalid. If the instruction defines other registers,
61506f32e7eSjoerg // save a list of them so that we don't pick a new register
61606f32e7eSjoerg // that overlaps any of them.
61706f32e7eSjoerg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
61806f32e7eSjoerg MachineOperand &MO = MI.getOperand(i);
61906f32e7eSjoerg if (!MO.isReg()) continue;
62006f32e7eSjoerg Register Reg = MO.getReg();
62106f32e7eSjoerg if (Reg == 0) continue;
62206f32e7eSjoerg if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
62306f32e7eSjoerg AntiDepReg = 0;
62406f32e7eSjoerg break;
62506f32e7eSjoerg }
62606f32e7eSjoerg if (MO.isDef() && Reg != AntiDepReg)
62706f32e7eSjoerg ForbidRegs.push_back(Reg);
62806f32e7eSjoerg }
62906f32e7eSjoerg }
63006f32e7eSjoerg
63106f32e7eSjoerg // Determine AntiDepReg's register class, if it is live and is
63206f32e7eSjoerg // consistently used within a single class.
63306f32e7eSjoerg const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg]
63406f32e7eSjoerg : nullptr;
63506f32e7eSjoerg assert((AntiDepReg == 0 || RC != nullptr) &&
63606f32e7eSjoerg "Register should be live if it's causing an anti-dependence!");
63706f32e7eSjoerg if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
63806f32e7eSjoerg AntiDepReg = 0;
63906f32e7eSjoerg
64006f32e7eSjoerg // Look for a suitable register to use to break the anti-dependence.
64106f32e7eSjoerg //
64206f32e7eSjoerg // TODO: Instead of picking the first free register, consider which might
64306f32e7eSjoerg // be the best.
64406f32e7eSjoerg if (AntiDepReg != 0) {
64506f32e7eSjoerg std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
64606f32e7eSjoerg std::multimap<unsigned, MachineOperand *>::iterator>
64706f32e7eSjoerg Range = RegRefs.equal_range(AntiDepReg);
64806f32e7eSjoerg if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
64906f32e7eSjoerg AntiDepReg,
65006f32e7eSjoerg LastNewReg[AntiDepReg],
65106f32e7eSjoerg RC, ForbidRegs)) {
65206f32e7eSjoerg LLVM_DEBUG(dbgs() << "Breaking anti-dependence edge on "
65306f32e7eSjoerg << printReg(AntiDepReg, TRI) << " with "
65406f32e7eSjoerg << RegRefs.count(AntiDepReg) << " references"
65506f32e7eSjoerg << " using " << printReg(NewReg, TRI) << "!\n");
65606f32e7eSjoerg
65706f32e7eSjoerg // Update the references to the old register to refer to the new
65806f32e7eSjoerg // register.
65906f32e7eSjoerg for (std::multimap<unsigned, MachineOperand *>::iterator
66006f32e7eSjoerg Q = Range.first, QE = Range.second; Q != QE; ++Q) {
66106f32e7eSjoerg Q->second->setReg(NewReg);
66206f32e7eSjoerg // If the SU for the instruction being updated has debug information
66306f32e7eSjoerg // related to the anti-dependency register, make sure to update that
66406f32e7eSjoerg // as well.
66506f32e7eSjoerg const SUnit *SU = MISUnitMap[Q->second->getParent()];
66606f32e7eSjoerg if (!SU) continue;
66706f32e7eSjoerg UpdateDbgValues(DbgValues, Q->second->getParent(),
66806f32e7eSjoerg AntiDepReg, NewReg);
66906f32e7eSjoerg }
67006f32e7eSjoerg
67106f32e7eSjoerg // We just went back in time and modified history; the
67206f32e7eSjoerg // liveness information for the anti-dependence reg is now
67306f32e7eSjoerg // inconsistent. Set the state as if it were dead.
67406f32e7eSjoerg Classes[NewReg] = Classes[AntiDepReg];
67506f32e7eSjoerg DefIndices[NewReg] = DefIndices[AntiDepReg];
67606f32e7eSjoerg KillIndices[NewReg] = KillIndices[AntiDepReg];
67706f32e7eSjoerg assert(((KillIndices[NewReg] == ~0u) !=
67806f32e7eSjoerg (DefIndices[NewReg] == ~0u)) &&
67906f32e7eSjoerg "Kill and Def maps aren't consistent for NewReg!");
68006f32e7eSjoerg
68106f32e7eSjoerg Classes[AntiDepReg] = nullptr;
68206f32e7eSjoerg DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
68306f32e7eSjoerg KillIndices[AntiDepReg] = ~0u;
68406f32e7eSjoerg assert(((KillIndices[AntiDepReg] == ~0u) !=
68506f32e7eSjoerg (DefIndices[AntiDepReg] == ~0u)) &&
68606f32e7eSjoerg "Kill and Def maps aren't consistent for AntiDepReg!");
68706f32e7eSjoerg
68806f32e7eSjoerg RegRefs.erase(AntiDepReg);
68906f32e7eSjoerg LastNewReg[AntiDepReg] = NewReg;
69006f32e7eSjoerg ++Broken;
69106f32e7eSjoerg }
69206f32e7eSjoerg }
69306f32e7eSjoerg
69406f32e7eSjoerg ScanInstruction(MI, Count);
69506f32e7eSjoerg }
69606f32e7eSjoerg
69706f32e7eSjoerg return Broken;
69806f32e7eSjoerg }
699*da58b97aSjoerg
700*da58b97aSjoerg AntiDepBreaker *
createCriticalAntiDepBreaker(MachineFunction & MFi,const RegisterClassInfo & RCI)701*da58b97aSjoerg llvm::createCriticalAntiDepBreaker(MachineFunction &MFi,
702*da58b97aSjoerg const RegisterClassInfo &RCI) {
703*da58b97aSjoerg return new CriticalAntiDepBreaker(MFi, RCI);
704*da58b97aSjoerg }
705