1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "PullConstantHeuristics.hpp"
10
11 #include "Platform.hpp"
12
13 using namespace llvm;
14 using namespace IGC;
15
16
17 static const unsigned EUInstCycleCount = 2;
18 static const unsigned SendInstCycleCount = 190;
19 static const unsigned RTWriteInstCycleCount = 190;
20
21 char PullConstantHeuristics::ID = 0;
22
23 #define PASS_FLAG "Analyse shader to determine push const threshold"
24 #define PASS_DESCRIPTION "This is an analysis pass for pulling constants for short shaders "
25 #define PASS_CFG_ONLY true
26 #define PASS_ANALYSIS true
IGC_INITIALIZE_PASS_BEGIN(PullConstantHeuristics,PASS_FLAG,PASS_DESCRIPTION,PASS_CFG_ONLY,PASS_ANALYSIS)27 IGC_INITIALIZE_PASS_BEGIN(PullConstantHeuristics, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
28 IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
29 IGC_INITIALIZE_PASS_END(PullConstantHeuristics, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
30
31
32 //needs to be fine-tuned after obtaining feedback from visa
33 static unsigned estimateShaderLifetime(unsigned int EUCnt, unsigned SendMsgCnt, unsigned RTWriteCnt)
34 {
35 return EUCnt * EUInstCycleCount + SendMsgCnt * SendInstCycleCount + RTWriteCnt * RTWriteInstCycleCount;
36 }
37
isSendMessage(GenIntrinsicInst * inst)38 static bool isSendMessage(GenIntrinsicInst* inst)
39 {
40 if (isSampleInstruction(inst) || isSampleLoadGather4InfoInstruction(inst))
41 {
42 return true;
43 }
44 return false;
45 }
46
47 //approximating EU insts count - TODO: need a better way to do this
getEUInstEstimate(Instruction * inst)48 static unsigned getEUInstEstimate(Instruction* inst)
49 {
50 //handle ALU, Logical, and load-store insts
51 //Presently we're restricting to shaders with 1 BB only, i.e, short shaders
52 //But this handles branch insts as well (in case of multiple BB support in future)
53 if (inst->getOpcode() <= Instruction::Fence)
54 {
55 return 1;
56 }
57
58 //handling remaining inst types
59 switch (inst->getOpcode())
60 {
61 case Instruction::FCmp:
62 case Instruction::ICmp:
63 case Instruction::Select:
64 case Instruction::Ret:
65 return 1;
66 default:
67 //bitcast insts don't make an EU inst in visa
68 return 0;
69 }
70 }
71
72 //estimate EU, SendMsg and RTWrite insts required by the PS
getInstStats(const Function & F)73 static std::tuple<unsigned, unsigned, unsigned> getInstStats(const Function& F) {
74 unsigned EUInstCnt = 0;
75 unsigned int SendMsgInstCnt = 0;
76 unsigned int RTWriteInstCnt = 0;
77 for (auto BBI = F.getBasicBlockList().begin(); BBI != F.getBasicBlockList().end(); BBI++)
78 {
79 llvm::BasicBlock* BB = const_cast<llvm::BasicBlock*>(&*BBI);
80 for (auto II = BB->begin(); II != BB->end(); II++)
81 {
82 if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(II))
83 {
84 if (isSendMessage(pIntrinsic))
85 SendMsgInstCnt++;
86 else if (pIntrinsic->getIntrinsicID() == GenISAIntrinsic::GenISA_RTWrite)
87 RTWriteInstCnt++;
88 }
89 else
90 {
91 EUInstCnt += getEUInstEstimate(&*II);
92 }
93 }
94 }
95 return std::make_tuple(EUInstCnt, SendMsgInstCnt, RTWriteInstCnt);
96 }
97
98 //Pixel Shader Dispatch can be bottleneck if
99 // thread_payload_size > max(simd4_sample_instr, simd4_eu_instr / 16, simd4_rt_write * 2, shader_lifetime / 56)
getPSDBottleNeckThreshold(const Function & F)100 unsigned int PullConstantHeuristics::getPSDBottleNeckThreshold(const Function& F)
101 {
102 CodeGenContext* ctx = nullptr;
103 ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
104 const unsigned numThreadsPerSubslice = ctx->platform.getMaxNumberThreadPerSubslice(); //read from ctx.platform
105 const unsigned roofLineIPC = 16;
106 const unsigned pixelRate = 2;
107
108 unsigned EstimatedEUInstCnt = 0;
109 unsigned SendMsgInstCnt = 0;
110 unsigned RTWriteInstCnt = 0;
111 std::tie(EstimatedEUInstCnt, SendMsgInstCnt, RTWriteInstCnt) = getInstStats(F);
112
113 unsigned shaderLifetime = estimateShaderLifetime(EstimatedEUInstCnt, SendMsgInstCnt, RTWriteInstCnt);
114
115 //calculate payload size threshold assuming SIMD16 to stop pushing constants
116 unsigned SIMD4_EU_Cnt = EstimatedEUInstCnt * 4;
117 unsigned SIMD4_Sample_Cnt = SendMsgInstCnt * 4;
118 unsigned SIMD4_RT_Write_Cnt = RTWriteInstCnt * 4;
119 unsigned payloadThreshold = std::max(std::max(SIMD4_Sample_Cnt, SIMD4_EU_Cnt / roofLineIPC),
120 std::max(SIMD4_RT_Write_Cnt * pixelRate, shaderLifetime / numThreadsPerSubslice));
121 return payloadThreshold;
122 }
123
124 //currentPayloadSize = payloadHeaderSIMD16 + payloadBarySIMD16 + inputSize;
125 //
getCurrentPayloadSizeEstimate(const Function & F)126 static unsigned getCurrentPayloadSizeEstimate(const Function& F)
127 {
128 unsigned payloadHeaderSIMD16 = 3;
129 unsigned payloadBarySIMD16 = 4;
130 unsigned inputGRFSize = 1;
131
132 //helper variables
133 unsigned maxValueFromInputVec = 0;
134 std::set<unsigned> countOfDifferentBary;
135 for (auto BBI = F.getBasicBlockList().begin(); BBI != F.getBasicBlockList().end(); BBI++)
136 {
137 llvm::BasicBlock* BB = const_cast<llvm::BasicBlock*>(&*BBI);
138 for (auto II = BB->begin(); II != BB->end(); II++)
139 {
140 if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(II))
141 {
142 if (pIntrinsic->getIntrinsicID() == GenISAIntrinsic::GenISA_DCL_inputVec)
143 {
144 countOfDifferentBary.insert((unsigned)llvm::cast<llvm::ConstantInt>(II->getOperand(1))->getZExtValue());
145 maxValueFromInputVec = std::max(maxValueFromInputVec, (unsigned)llvm::cast<llvm::ConstantInt>(II->getOperand(0))->getZExtValue());
146 }
147 }
148 }
149 }
150 payloadBarySIMD16 = countOfDifferentBary.size() * 4;
151 inputGRFSize = (unsigned)(maxValueFromInputVec / 2) % 2 == 0 ? (maxValueFromInputVec / 2) : ((maxValueFromInputVec / 2) + 1);
152 return payloadHeaderSIMD16 + payloadBarySIMD16 + inputGRFSize;
153 }
154
runOnModule(Module & M)155 bool PullConstantHeuristics::runOnModule(Module& M)
156 {
157 if (IGC_IS_FLAG_ENABLED(DisablePullConstantHeuristics))
158 {
159 return false;
160 }
161 CodeGenContext* ctx = nullptr;
162 ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
163 if (ctx->type == ShaderType::PIXEL_SHADER && ctx->platform.hasPSDBottleneck())
164 {
165 for (auto& F : M)
166 {
167 if (F.getBasicBlockList().size() == 1)
168 {
169 BasicBlock* BB = &(*F.begin());
170 if (BB->getInstList().size() < 200)
171 {//short shaders
172 int PSDBottleNeckThreshold = getPSDBottleNeckThreshold(F);
173 int PayloadWithoutConstants = getCurrentPayloadSizeEstimate(F);
174 int maxPayload_Regkey = (IGC_GET_FLAG_VALUE(PayloadSizeThreshold));
175 PSDBottleNeckThreshold = PSDBottleNeckThreshold > maxPayload_Regkey ?
176 maxPayload_Regkey : PSDBottleNeckThreshold;
177
178 int threshold = PSDBottleNeckThreshold - PayloadWithoutConstants;
179 threshold = threshold < 0 ? 0 : threshold;
180 thresholdMap.insert(std::make_pair(&F, threshold));
181 }
182 }
183 }
184
185 }
186 return false;//does not modify IR
187 }
188