1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Any MIMG instructions that use tfe or lwe require an initialization of the
11 /// result register that will be written in the case of a memory access failure
12 /// The required code is also added to tie this init code to the result of the
13 /// img instruction
14 ///
15 //===----------------------------------------------------------------------===//
16 //
17
18 #include "AMDGPU.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22
23 #define DEBUG_TYPE "si-img-init"
24
25 using namespace llvm;
26
27 namespace {
28
29 class SIAddIMGInit : public MachineFunctionPass {
30 public:
31 static char ID;
32
33 public:
SIAddIMGInit()34 SIAddIMGInit() : MachineFunctionPass(ID) {
35 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
36 }
37
38 bool runOnMachineFunction(MachineFunction &MF) override;
39
getAnalysisUsage(AnalysisUsage & AU) const40 void getAnalysisUsage(AnalysisUsage &AU) const override {
41 AU.setPreservesCFG();
42 MachineFunctionPass::getAnalysisUsage(AU);
43 }
44 };
45
46 } // End anonymous namespace.
47
48 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
49
50 char SIAddIMGInit::ID = 0;
51
52 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
53
createSIAddIMGInitPass()54 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
55
runOnMachineFunction(MachineFunction & MF)56 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
57 MachineRegisterInfo &MRI = MF.getRegInfo();
58 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
59 const SIInstrInfo *TII = ST.getInstrInfo();
60 const SIRegisterInfo *RI = ST.getRegisterInfo();
61 bool Changed = false;
62
63 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
64 ++BI) {
65 MachineBasicBlock &MBB = *BI;
66 MachineBasicBlock::iterator I, Next;
67 for (I = MBB.begin(); I != MBB.end(); I = Next) {
68 Next = std::next(I);
69 MachineInstr &MI = *I;
70
71 auto Opcode = MI.getOpcode();
72 if (TII->isMIMG(Opcode) && !MI.mayStore()) {
73 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
74 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
75 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
76
77 if (!TFE && !LWE) // intersect_ray
78 continue;
79
80 unsigned TFEVal = TFE->getImm();
81 unsigned LWEVal = LWE->getImm();
82 unsigned D16Val = D16 ? D16->getImm() : 0;
83
84 if (TFEVal || LWEVal) {
85 // At least one of TFE or LWE are non-zero
86 // We have to insert a suitable initialization of the result value and
87 // tie this to the dest of the image instruction.
88
89 const DebugLoc &DL = MI.getDebugLoc();
90
91 int DstIdx =
92 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
93
94 // Calculate which dword we have to initialize to 0.
95 MachineOperand *MO_Dmask =
96 TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
97
98 // check that dmask operand is found.
99 assert(MO_Dmask && "Expected dmask operand in instruction");
100
101 unsigned dmask = MO_Dmask->getImm();
102 // Determine the number of active lanes taking into account the
103 // Gather4 special case
104 unsigned ActiveLanes =
105 TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
106
107 bool Packed = !ST.hasUnpackedD16VMem();
108
109 unsigned InitIdx =
110 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
111
112 // Abandon attempt if the dst size isn't large enough
113 // - this is in fact an error but this is picked up elsewhere and
114 // reported correctly.
115 uint32_t DstSize =
116 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
117 if (DstSize < InitIdx)
118 continue;
119
120 // Create a register for the intialization value.
121 Register PrevDst =
122 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
123 unsigned NewDst = 0; // Final initialized value will be in here
124
125 // If PRTStrictNull feature is enabled (the default) then initialize
126 // all the result registers to 0, otherwise just the error indication
127 // register (VGPRn+1)
128 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
129 unsigned CurrIdx = ST.usePRTStrictNull() ? 0 : (InitIdx - 1);
130
131 if (DstSize == 1) {
132 // In this case we can just initialize the result directly
133 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
134 .addImm(0);
135 NewDst = PrevDst;
136 } else {
137 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
138 for (; SizeLeft; SizeLeft--, CurrIdx++) {
139 NewDst =
140 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
141 // Initialize dword
142 Register SubReg =
143 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
144 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
145 .addImm(0);
146 // Insert into the super-reg
147 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
148 .addReg(PrevDst)
149 .addReg(SubReg)
150 .addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
151
152 PrevDst = NewDst;
153 }
154 }
155
156 // Add as an implicit operand
157 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
158
159 // Tie the just added implicit operand to the dst
160 MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
161
162 Changed = true;
163 }
164 }
165 }
166 }
167
168 return Changed;
169 }
170