1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Dependencies_G4IR.h"
10 #include "../G4_IR.hpp"
11
12 using namespace vISA;
13
14 enum retDepType { RET_RAW = 1, RET_WAW, RET_WAR };
15
16 // Checks for memory interferences created with the "send" instruction for data port.
DoMemoryInterfereSend(G4_InstSend * send1,G4_InstSend * send2,retDepType depT,bool BTIIsRestrict)17 static DepType DoMemoryInterfereSend(G4_InstSend *send1, G4_InstSend *send2, retDepType depT, bool BTIIsRestrict)
18 {
19 // If either instruction is not a send then there cannot be a memory interference.
20 if (!send1 || !send2 || !send1->isSend() || !send2->isSend())
21 {
22 return NODEP;
23 }
24 auto isBarrierOrAtomic = [](const G4_InstSend *i) {
25 return i->getMsgDesc()->isBarrier() || i->getMsgDesc()->isAtomic();
26 };
27 if (isBarrierOrAtomic(send1) || isBarrierOrAtomic(send2)) {
28 return MSG_BARRIER;
29 }
30
31 bool isSend1DataPort = send1->getMsgDesc()->isHDC() || send1->getMsgDesc()->isLSC();
32 bool isSend2DataPort = send2->getMsgDesc()->isHDC() || send2->getMsgDesc()->isLSC();
33
34 SFID funcId1 = send1->getMsgDesc()->getSFID();
35 SFID funcId2 = send2->getMsgDesc()->getSFID();
36
37 if (funcId1 == SFID::SAMPLER || funcId2 == SFID::SAMPLER)
38 {
39 // sampler acess will never have memory conflict
40 return NODEP;
41 }
42
43 #define MSG_DESC_BTI_MASK 0xFF
44 #define RESERVED_BTI_START 240
45 if (isSend1DataPort ^ isSend2DataPort)
46 {
47 // HDC messages will not conflict with other HDC messages (e.g., SAMPLER, URB, RT_WRITE)
48 return NODEP;
49 }
50 else if (isSend1DataPort && isSend2DataPort)
51 {
52 auto hasImmediateBTI = [](G4_InstSend* send, unsigned int &bti){
53 G4_SendDescRaw* msgDesc = send->getMsgDescRaw();
54 if (msgDesc && msgDesc->isLSC() && msgDesc->getLscAddrType() == LSC_ADDR_TYPE_BTI && msgDesc->getSurface() == nullptr) {
55 // LSC messages
56 bti = msgDesc->getExtendedDesc() >> 24;
57 return true;
58 }
59 else if (msgDesc && msgDesc->isHDC() && msgDesc->getSurface() && send->getMsgDescOperand()->isImm())
60 {
61 // HDC messages
62 bti = (unsigned int)send->getMsgDescOperand()->asImm()->getInt() & MSG_DESC_BTI_MASK;
63 return true;
64 }
65 return false;
66 };
67
68 unsigned int bti1 = 0, bti2 = 0;
69 if (send1->getMsgDesc()->isSLM() ^ send2->getMsgDesc()->isSLM())
70 {
71 // SLM may not conflict with other non-SLM messages
72 return NODEP;
73 }
74 else if (hasImmediateBTI(send1, bti1) && hasImmediateBTI(send2, bti2))
75 {
76 auto isBTS = [](uint32_t bti) { return bti < RESERVED_BTI_START; };
77 if (BTIIsRestrict && isBTS(bti1) && isBTS(bti2) && bti1 != bti2)
78 {
79 // different BTI means no conflict for DP messages
80 return NODEP;
81 }
82 }
83 }
84
85 // TODO: We can add more precise memory conflict checks here for special messages
86 // (e.g., URB that have constant offset)
87
88 // scratch RW may only conflict with other scratch RW
89 if (send1->getMsgDesc()->isScratch() != send2->getMsgDesc()->isScratch())
90 {
91 return NODEP;
92 }
93
94 // Determine any relevant memory interferences through data port operations.
95 if (send1->getMsgDesc()->isWrite())
96 {
97 if (depT == RET_RAW && send2->getMsgDesc()->isRead())
98 {
99 return RAW_MEMORY;
100 }
101 else if (depT == RET_WAW && send2->getMsgDesc()->isWrite())
102 {
103 return WAW_MEMORY;
104 }
105 else
106 {
107 return NODEP;
108 }
109 }
110 else if (send1->getMsgDesc()->isRead())
111 {
112 if (depT == RET_WAR && send2->getMsgDesc()->isWrite())
113 {
114 return WAR_MEMORY;
115 }
116
117 else
118 {
119 return NODEP;
120 }
121 }
122
123 else
124 {
125 return NODEP;
126 }
127 }
128
DoMemoryInterfereScratchSend(G4_INST * send1,G4_INST * send2,retDepType depT)129 static DepType DoMemoryInterfereScratchSend(G4_INST *send1, G4_INST *send2, retDepType depT)
130 {
131 // If either instruction is not a send then there cannot be a memory interference.
132 if (!send1 || !send2 || !send1->isSend() || !send2->isSend())
133 {
134 return NODEP;
135 }
136
137 // scratch RW may only conflict with other scratch RW
138 if (send1->getMsgDesc()->isScratch() != send2->getMsgDesc()->isScratch())
139 {
140 return NODEP;
141 }
142
143 // check dependency between scratch block read/write
144 if (send1->getMsgDesc()->isScratch() && send2->getMsgDesc()->isScratch())
145 {
146 bool send1IsRead = send1->getMsgDesc()->isScratchRead(),
147 send2IsRead = send2->getMsgDesc()->isScratchRead();
148 if (send1IsRead && send2IsRead)
149 {
150 return NODEP;
151 }
152 if ((depT == RET_WAR && send1IsRead && !send2IsRead) ||
153 (depT == RET_WAW && !send1IsRead && !send2IsRead) ||
154 (depT == RET_RAW && !send1IsRead && send2IsRead))
155 {
156
157 uint16_t leftOff1 = send1->getMsgDesc()->getOffset();
158 uint16_t leftOff2 = send2->getMsgDesc()->getOffset();
159 auto bytesAccessed = [](const G4_INST *send) {
160 return send->getMsgDesc()->isRead() ?
161 (uint16_t)send->getMsgDesc()->getDstLenBytes() :
162 (uint16_t)send->getMsgDesc()->getSrc1LenBytes();
163 };
164 uint16_t rightOff1 = leftOff1 + bytesAccessed(send1) - 1;
165 uint16_t rightOff2 = leftOff2 + bytesAccessed(send2) - 1;
166 if (leftOff1 > rightOff2 || leftOff2 > rightOff1)
167 {
168 return NODEP;
169 }
170
171 if (send1IsRead && !send2IsRead)
172 {
173 return WAR_MEMORY;
174 }
175 if (!send1IsRead && !send2IsRead)
176 {
177 return WAW_MEMORY;
178 }
179 if (!send1IsRead && send2IsRead)
180 {
181 return RAW_MEMORY;
182 }
183 }
184 return NODEP;
185 }
186 else
187 {
188 return NODEP;
189 }
190 }
191
getDepSend(G4_INST * curInst,G4_INST * liveInst,bool BTIIsRestrict)192 DepType vISA::getDepSend(G4_INST *curInst, G4_INST *liveInst, bool BTIIsRestrict)
193 {
194 for (auto RDEP : { RET_RAW, RET_WAR, RET_WAW })
195 {
196 DepType dep = DoMemoryInterfereSend(curInst->asSendInst(), liveInst->asSendInst(), RDEP, BTIIsRestrict);
197 if (dep != NODEP)
198 return dep;
199 }
200 return NODEP;
201 }
202
getDepScratchSend(G4_INST * curInst,G4_INST * liveInst)203 DepType vISA::getDepScratchSend(G4_INST *curInst, G4_INST *liveInst)
204 {
205 for (auto RDEP : { RET_RAW, RET_WAR, RET_WAW })
206 {
207 DepType dep = DoMemoryInterfereScratchSend(curInst, liveInst, RDEP);
208 if (dep != NODEP)
209 return dep;
210 }
211 return NODEP;
212 }
213
CheckBarrier(G4_INST * inst)214 DepType vISA::CheckBarrier(G4_INST *inst)
215 {
216 if (inst->isOptBarrier() || inst->isAtomicInst())
217 {
218 return OPT_BARRIER;
219 }
220 if (inst->isSend())
221 {
222 if (inst->asSendInst()->isSendc())
223 {
224 // sendc may imply synchronization
225 return SEND_BARRIER;
226 }
227 if (inst->getMsgDesc()->isEOT())
228 {
229 // Send with the EOT message desciptor is a barrier.
230 return SEND_BARRIER;
231 }
232 else if (inst->getMsgDescRaw() && inst->getMsgDescRaw()->isThreadMessage())
233 {
234 return MSG_BARRIER;
235 }
236 }
237 else if (inst->opcode() == G4_wait || inst->isYieldInst())
238 {
239 return MSG_BARRIER;
240 }
241 else if (inst->isFlowControl())
242 {
243 // All control flow instructions are scheduling barriers
244 return CONTROL_FLOW_BARRIER;
245 }
246
247 return NODEP;
248 }
249
250 // Return the dependence type {RAW,WAW,WAR,NODEP} for the given operand numbers
getDepForOpnd(Gen4_Operand_Number cur,Gen4_Operand_Number liv)251 DepType vISA::getDepForOpnd(Gen4_Operand_Number cur, Gen4_Operand_Number liv)
252 {
253 assert(Opnd_dst <= cur && cur < Opnd_total_num && "bad operand #");
254 assert(Opnd_dst <= liv && liv < Opnd_total_num && "bad operand #");
255 static constexpr DepType matrix[Opnd_total_num][Opnd_total_num] = {
256 /*dst, src0, src1, src2, src3, src4, src5, src6, src7, pred, condMod, implAccSrc, implAccDst */
257 /*dst*/ { WAW, RAW, RAW, RAW, RAW, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, RAW, WAW, RAW, WAW },
258 /*src0*/ { WAR, NODEP, NODEP, NODEP, NODEP, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, NODEP, WAR, NODEP, WAR },
259 /*src1*/ { WAR, NODEP, NODEP, NODEP, NODEP, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, NODEP, WAR, NODEP, WAR },
260 /*src2*/ { WAR, NODEP, NODEP, NODEP, NODEP, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, NODEP, WAR, NODEP, WAR },
261 /*src3*/ { WAR, NODEP, NODEP, NODEP, NODEP, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, NODEP, WAR, NODEP, WAR },
262 /*src4*/ { DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX },
263 /*src5*/ { DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX },
264 /*src6*/ { DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX },
265 /*src7*/ { DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX },
266 /*pred*/ { WAR, NODEP, NODEP, NODEP, NODEP, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, NODEP, WAR, NODEP, WAR },
267 /*condMod*/ { WAW, RAW, RAW, RAW, RAW, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, RAW, WAW, RAW, WAW },
268 /*implAccSrc*/ { WAR, NODEP, NODEP, NODEP, NODEP, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, NODEP, WAR, NODEP, WAR },
269 /*implAccDst*/ { WAW, RAW, RAW, RAW, RAW, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, DEPTYPE_MAX, RAW, WAW, RAW, WAW },
270 };
271 assert(matrix[cur][liv] != DEPTYPE_MAX && "undefined dependency");
272 return matrix[cur][liv];
273 }
274