1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Any MIMG instructions that use tfe or lwe require an initialization of the
11 /// result register that will be written in the case of a memory access failure
12 /// The required code is also added to tie this init code to the result of the
13 /// img instruction
14 ///
15 //===----------------------------------------------------------------------===//
16 //
17 
18 #include "AMDGPU.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 
23 #define DEBUG_TYPE "si-img-init"
24 
25 using namespace llvm;
26 
27 namespace {
28 
29 class SIAddIMGInit : public MachineFunctionPass {
30 public:
31   static char ID;
32 
33 public:
SIAddIMGInit()34   SIAddIMGInit() : MachineFunctionPass(ID) {
35     initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
36   }
37 
38   bool runOnMachineFunction(MachineFunction &MF) override;
39 
getAnalysisUsage(AnalysisUsage & AU) const40   void getAnalysisUsage(AnalysisUsage &AU) const override {
41     AU.setPreservesCFG();
42     MachineFunctionPass::getAnalysisUsage(AU);
43   }
44 };
45 
46 } // End anonymous namespace.
47 
48 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
49 
50 char SIAddIMGInit::ID = 0;
51 
52 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
53 
createSIAddIMGInitPass()54 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
55 
runOnMachineFunction(MachineFunction & MF)56 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
57   MachineRegisterInfo &MRI = MF.getRegInfo();
58   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
59   const SIInstrInfo *TII = ST.getInstrInfo();
60   const SIRegisterInfo *RI = ST.getRegisterInfo();
61   bool Changed = false;
62 
63   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
64        ++BI) {
65     MachineBasicBlock &MBB = *BI;
66     MachineBasicBlock::iterator I, Next;
67     for (I = MBB.begin(); I != MBB.end(); I = Next) {
68       Next = std::next(I);
69       MachineInstr &MI = *I;
70 
71       auto Opcode = MI.getOpcode();
72       if (TII->isMIMG(Opcode) && !MI.mayStore()) {
73         MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
74         MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
75         MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
76 
77         if (!TFE && !LWE) // intersect_ray
78           continue;
79 
80         unsigned TFEVal = TFE->getImm();
81         unsigned LWEVal = LWE->getImm();
82         unsigned D16Val = D16 ? D16->getImm() : 0;
83 
84         if (TFEVal || LWEVal) {
85           // At least one of TFE or LWE are non-zero
86           // We have to insert a suitable initialization of the result value and
87           // tie this to the dest of the image instruction.
88 
89           const DebugLoc &DL = MI.getDebugLoc();
90 
91           int DstIdx =
92               AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
93 
94           // Calculate which dword we have to initialize to 0.
95           MachineOperand *MO_Dmask =
96               TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
97 
98           // check that dmask operand is found.
99           assert(MO_Dmask && "Expected dmask operand in instruction");
100 
101           unsigned dmask = MO_Dmask->getImm();
102           // Determine the number of active lanes taking into account the
103           // Gather4 special case
104           unsigned ActiveLanes =
105               TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
106 
107           bool Packed = !ST.hasUnpackedD16VMem();
108 
109           unsigned InitIdx =
110               D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
111 
112           // Abandon attempt if the dst size isn't large enough
113           // - this is in fact an error but this is picked up elsewhere and
114           // reported correctly.
115           uint32_t DstSize =
116               RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
117           if (DstSize < InitIdx)
118             continue;
119 
120           // Create a register for the intialization value.
121           Register PrevDst =
122               MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
123           unsigned NewDst = 0; // Final initialized value will be in here
124 
125           // If PRTStrictNull feature is enabled (the default) then initialize
126           // all the result registers to 0, otherwise just the error indication
127           // register (VGPRn+1)
128           unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
129           unsigned CurrIdx = ST.usePRTStrictNull() ? 0 : (InitIdx - 1);
130 
131           if (DstSize == 1) {
132             // In this case we can just initialize the result directly
133             BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
134                 .addImm(0);
135             NewDst = PrevDst;
136           } else {
137             BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
138             for (; SizeLeft; SizeLeft--, CurrIdx++) {
139               NewDst =
140                   MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
141               // Initialize dword
142               Register SubReg =
143                   MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
144               BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
145                   .addImm(0);
146               // Insert into the super-reg
147               BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
148                   .addReg(PrevDst)
149                   .addReg(SubReg)
150                   .addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));
151 
152               PrevDst = NewDst;
153             }
154           }
155 
156           // Add as an implicit operand
157           MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
158 
159           // Tie the just added implicit operand to the dst
160           MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
161 
162           Changed = true;
163         }
164       }
165     }
166   }
167 
168   return Changed;
169 }
170