1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "MCSOptimization.hpp"
10 #include "IGCPassSupport.h"
11 #include "GenISAIntrinsics/GenIntrinsicInst.h"
12 #include "Compiler/CodeGenPublic.h"
13 #include "Compiler/WorkaroundAnalysisPass.h"
14 #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
15 #include <set>
16 #include "common/LLVMWarningsPush.hpp"
17 #include "llvm/IR/Function.h"
18 #include <llvm/IR/InstVisitor.h>
19 #include <llvm/IR/IRBuilder.h>
20 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
21 #include <llvm/Support/Casting.h>
22 #include "common/LLVMWarningsPop.hpp"
23 #include "common/IGCIRBuilder.h"
24 #include "common/igc_regkeys.hpp"
25 #include "Probe/Assertion.h"
26
27 using namespace llvm;
28 using namespace IGC;
29 /************************************************************************
30 This transformation is not safe in general. It can be applied only in those case:
31 -We know that the resouce is MCS compressed
32 -We need to know that we don't access out of bound sample index
33 ************************************************************************/
34 class MCSOptimization : public FunctionPass, public InstVisitor<MCSOptimization>
35 {
36 public:
MCSOptimization()37 MCSOptimization() : FunctionPass(ID) {}
38 bool runOnFunction(Function& F);
39 void visitCallInst(llvm::CallInst& I);
getAnalysisUsage(llvm::AnalysisUsage & AU) const40 void getAnalysisUsage(llvm::AnalysisUsage& AU) const
41 {
42 AU.addRequired<CodeGenContextWrapper>();
43 }
getPassName() const44 virtual llvm::StringRef getPassName() const
45 {
46 return "MCSOptimization";
47 }
48
49 static char ID;
50 bool m_changed;
51
52 private:
shaderSamplesCompressedSurfaces(CodeGenContext * ctx)53 bool shaderSamplesCompressedSurfaces(CodeGenContext* ctx)
54 {
55 ModuleMetaData* modMD = ctx->getModuleMetaData();
56 for (unsigned int i = 0; i < NUM_SHADER_RESOURCE_VIEW_SIZE; i++)
57 {
58 if (modMD->m_ShaderResourceViewMcsMask[i] != 0)
59 {
60 return true;
61 }
62 }
63 return false;
64 }
65 protected:
66 };
67
68 char MCSOptimization::ID = 0;
69
runOnFunction(Function & F)70 bool MCSOptimization::runOnFunction(Function& F)
71 {
72
73 if (IGC_IS_FLAG_ENABLED(DisableMCSOpt))
74 {
75 return false;
76 }
77 m_changed = false;
78 visit(F);
79 return m_changed;
80 }
81
visitCallInst(llvm::CallInst & I)82 void MCSOptimization::visitCallInst(llvm::CallInst& I)
83 {
84 Function* F = I.getParent()->getParent();
85 IGCIRBuilder<> IRB(F->getContext());
86
87 if (LdmcsInstrinsic * ldMcs = dyn_cast<LdmcsInstrinsic>(&I))
88 {
89 CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
90
91 {
92 if (!shaderSamplesCompressedSurfaces(ctx))
93 {
94 return;
95 }
96
97 llvm::Value* textureArgValue = ldMcs->getTextureValue();
98 uint textureIndex;
99 if (textureArgValue->getType()->isPointerTy())
100 {
101 uint addrSpace = textureArgValue->getType()->getPointerAddressSpace();
102 uint bufferIndex = 0;
103 bool directIdx = false;
104 DecodeAS4GFXResource(addrSpace, directIdx, bufferIndex);
105 textureIndex = bufferIndex;
106 }
107 else
108 {
109 textureIndex = int_cast<uint>(GetImmediateVal(textureArgValue));
110 }
111
112 const unsigned int shaderResourceViewMcsMaskIndex = textureIndex / BITS_PER_QWORD;
113 const unsigned long long resourceViewMcsMaskElement = ctx->getModuleMetaData()->m_ShaderResourceViewMcsMask[shaderResourceViewMcsMaskIndex];
114 const unsigned int resourceViewMaskTextureBit = textureIndex % BITS_PER_QWORD;
115 IGC_ASSERT_MESSAGE(textureIndex <= 127, "Texture index is incorrectly extracted from ld_mcs");
116
117 unsigned long long resultBit = resourceViewMcsMaskElement >> resourceViewMaskTextureBit;
118 if ((resultBit & 1) == 0)
119 {
120 return;
121 }
122 }
123 ExtractElementInst* EEI = nullptr;
124 for (auto useItr : ldMcs->users())
125 {
126 if (ExtractElementInst * ee1 = dyn_cast<ExtractElementInst>(useItr))
127 {
128 if (ConstantInt * channel = dyn_cast<ConstantInt>(ee1->getOperand(1)))
129 {
130 if (channel->isZero())
131 {
132 EEI = ee1;
133 break;
134 }
135 }
136 }
137 }
138
139 if (EEI != nullptr)
140 {
141 if (EEI->hasOneUse())
142 return; //only one use of EEI -- noOptimization
143
144 LdmsInstrinsic* firstUse = nullptr;
145
146 for (auto it = EEI->getIterator(); it != EEI->getParent()->end(); ++it)
147 {
148 if (LdmsInstrinsic * ldmsIntr = dyn_cast<LdmsInstrinsic>(&*it))
149 {
150 if (ldmsIntr->getOperand(1) == dyn_cast<Value>(EEI))
151 {
152 //first use and in the def's BB
153 firstUse = ldmsIntr;
154 break;
155 }
156 }
157 }
158
159 if (!firstUse)
160 return;
161
162 //collect all blocks where this EEI insts is getting used
163 std::set<BasicBlock*> useBlocks;
164 for (auto BitcastUses = EEI->user_begin(); BitcastUses != EEI->user_end(); BitcastUses++)
165 {
166 Instruction* ldmsInst = dyn_cast<Instruction>(*BitcastUses);
167 if (ldmsInst)
168 {
169 if (dyn_cast<ConstantInt>(ldmsInst->getOperand(0)))
170 {
171 useBlocks.insert(ldmsInst->getParent());
172 }
173 else
174 {
175 return;
176 }
177 }
178 }
179
180 //iterate over useBlocks.
181 //For each useBlock, collect all the ldms insts present within the use block corresponding to this EEI
182 for (auto BB : useBlocks)
183 {
184 std::vector<LdmsInstrinsic*> ldmsInstsToMove;
185 for (auto inst = BB->begin(); inst != BB->end(); inst++)
186 {
187 if (LdmsInstrinsic * ldmsIntr = dyn_cast<LdmsInstrinsic>(inst))
188 {
189 if (ldmsIntr->getOperand(1) == dyn_cast<Value>(EEI))
190 {
191 if (ldmsIntr == firstUse)
192 continue; //don't move the first use into the then block , need it for phi Node
193 ldmsInstsToMove.push_back(ldmsIntr);
194 }
195 }
196 }
197
198 //this is added because clubbing all ld2dms into a single then block
199 //increases register pressure and causes spilling
200 int instClubThreshold = IGC_GET_FLAG_VALUE(ld2dmsInstsClubbingThreshold); //# ld2dms insts that can be moved into the then block
201 //int instClubThreshold = 2;
202 bool allInstsWillBeMoved = false;
203
204 while (!allInstsWillBeMoved)
205 {
206 std::vector<LdmsInstrinsic*> ldmsInstsToClub;
207 //Threshold is more than # of insts that are to be moved. So move all.
208 if (instClubThreshold >= static_cast<int>(ldmsInstsToMove.size()))
209 {
210 ldmsInstsToClub = ldmsInstsToMove;
211 allInstsWillBeMoved = true;
212 }
213 else
214 {
215 //pick the first 0-threshold # of insts and move them only
216 for (int i = 0; i < instClubThreshold; i++)
217 {
218 ldmsInstsToClub.push_back(ldmsInstsToMove[i]);
219 }
220 ldmsInstsToMove.erase(ldmsInstsToMove.begin(), ldmsInstsToMove.begin() + instClubThreshold);
221 }
222
223 //split the block into a new then block
224 BasicBlock* ldmsUseBB = nullptr; //second entry to the phi node
225 BasicBlock* thenBlock = nullptr;
226 IGCLLVM::TerminatorInst* thenBlockTerminator = nullptr;
227 if (ldmsInstsToClub.size() != 0)
228 {
229 LdmsInstrinsic* ldmsUse = ldmsInstsToClub[0];
230 ldmsUseBB = ldmsUse->getParent();
231 IRB.SetInsertPoint(ldmsUse);
232 Value* ValueisMCSNotZero = nullptr;
233 for (unsigned int i = 0; i < ldmsUse->getNumMcsOperands(); i++)
234 {
235 Value* mcs = firstUse->getMcsOperand(i);
236 Value* cnd1 = IRB.CreateICmpNE(mcs, ConstantInt::get(mcs->getType(), 0));
237 if (ValueisMCSNotZero == nullptr)
238 {
239 ValueisMCSNotZero = cnd1;
240 }
241 else
242 {
243 ValueisMCSNotZero = IRB.CreateOr(ValueisMCSNotZero, cnd1);
244 }
245 }
246 thenBlockTerminator = SplitBlockAndInsertIfThen(ValueisMCSNotZero, ldmsUse, false);
247 thenBlock = thenBlockTerminator->getParent();
248 }
249
250 //Move the collected ldms insts into the then block and insert their phi nodes in the successor of the then block
251 if (thenBlockTerminator)
252 {
253 for (auto instToMove : ldmsInstsToClub)
254 {
255 instToMove->moveBefore(thenBlockTerminator);
256 IRB.SetInsertPoint(&*(thenBlockTerminator->getSuccessor(0)->begin()));
257 PHINode* PN = IRB.CreatePHI(instToMove->getType(), 2);
258 instToMove->replaceAllUsesWith(PN);
259 PN->addIncoming(instToMove, thenBlock);
260 PN->addIncoming(firstUse, ldmsUseBB);
261 m_changed = true;
262 }
263 }
264
265 }
266 }
267 m_changed = true;
268 }
269 }
270 }
271
272 namespace IGC {
273 #define PASS_FLAG "optimize ld2ms message assuming resources are always compressed"
274 #define PASS_DESCRIPTION "This is an optimization pass for ld2dms message "
275 #define PASS_CFG_ONLY false
276 #define PASS_ANALYSIS true
IGC_INITIALIZE_PASS_BEGIN(MCSOptimization,PASS_FLAG,PASS_DESCRIPTION,PASS_CFG_ONLY,PASS_ANALYSIS)277 IGC_INITIALIZE_PASS_BEGIN(MCSOptimization, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
278 IGC_INITIALIZE_PASS_END(MCSOptimization, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
279
280 FunctionPass* CreateMCSOptimization()
281 {
282 return new MCSOptimization();
283 }
284 }
285