1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include <cassert>
24 #include <cstddef>
25 #include <cstdio>
26 #include <map>
27 #include <tuple>
28
29 #include "cm_fc_ld.h"
30
31 #include "DepGraph.h"
32
33 using namespace cm::patch;
34
getDepNode(Binary * B,unsigned Off,bool Barrier=false)35 DepNode *DepGraph::getDepNode(Binary *B, unsigned Off, bool Barrier = false) {
36 auto P = std::make_tuple(B, Off, Barrier);
37 auto I = NodeMap.find(P);
38 if (I != NodeMap.end())
39 return I->second;
40 Nodes.push_back(DepNode{B, Off, Barrier});
41 DepNode *N = &Nodes.back();
42 NodeMap[P] = N;
43 return N;
44 }
45
getDepEdge(DepNode * From,DepNode * To,bool FromDef)46 DepEdge *DepGraph::getDepEdge(DepNode *From, DepNode *To, bool FromDef) {
47 if (From == To) // No dependency on itself.
48 return nullptr;
49 auto P = std::make_pair(From, To);
50 auto I = EdgeMap.find(P);
51 if (I != EdgeMap.end())
52 return I->second;
53 // Add new edge.
54 Edges.push_back(DepEdge{From, To, FromDef});
55 DepEdge *E = &Edges.back();
56 EdgeMap[P] = E;
57 From->addToNode(To, FromDef);
58 To->addFromNode(From);
59 return E;
60 }
61
build()62 void DepGraph::build() {
63 // There's no need to build dependency graph for policy 0 & 2.
64 if (Policy == SWSB_POLICY_0 || Policy == SWSB_POLICY_2)
65 return;
66
67 std::map<unsigned, DepNode *> State;
68
69 auto requireDefSync = [](std::map<unsigned, DepNode *> &State) {
70 for (auto &KV : State)
71 if (KV.second->isDefByToken(KV.first))
72 return true;
73 return false;
74 };
75
76 auto requireUseSync = [](std::map<unsigned, DepNode *> &State) {
77 for (auto &KV : State)
78 if (KV.second->isUseByToken(KV.first))
79 return true;
80 return false;
81 };
82
83 unsigned Order = 0;
84 for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
85 Binary &B = *I;
86 if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE)
87 break;
88 B.setOrder(++Order);
89 if (B.getLinkType() == CM_FC_LINK_TYPE_CALLER) {
90 // For caller, add dependency edges to the sync point just before 'call'.
91 Relocation &Rel = *B.rel_begin(); // Assume there's just one 'call'.
92 Symbol *Sym = Rel.getSymbol();
93 Binary &Callee = *Sym->getBinary();
94 // Add a barrier node just before 'call' to resolve dependency.
95 DepNode *Node = getDepNode(&B, Rel.getOffset(), true);
96 for (auto RI = Callee.initreg_begin(),
97 RE = Callee.initreg_end(); RI != RE; ++RI) {
98 unsigned Reg = RI->getRegNo();
99 if (Reg == cm::patch::REG_NONE) {
100 /// Check if it's necessary to insert sync points.
101 bool reqDefSync = requireDefSync(State);
102 bool reqUseSync = requireUseSync(State);
103 if (reqDefSync || reqUseSync) {
104 Node->clearAccList();
105 if (reqDefSync) Node->setRdTokenMask(unsigned(-1));
106 if (reqUseSync) Node->setWrTokenMask(unsigned(-1));
107 B.insertSyncPoint(Node);
108 // Clear state after barrier.
109 State.clear();
110 break;
111 }
112 }
113 // Only build token-based dependency.
114 auto SI = State.find(Reg);
115 if (SI == State.end())
116 continue;
117 auto From = SI->second;
118 // Skip access not by token.
119 if (!From->isDefByToken(Reg) && !From->isUseByToken(Reg))
120 continue;
121 // Skip RAR.
122 if (From->isUseOnly(Reg) && RI->isUse())
123 continue;
124 Node->appendRegAcc(&*RI);
125 auto E = getDepEdge(From, Node, From->isDef(Reg));
126 }
127 // Only update token-based dependency.
128 Node = getDepNode(&B, Rel.getOffset());
129 for (auto RI = Callee.finireg_begin(),
130 RE = Callee.finireg_end(); RI != RE; ++RI) {
131 // Skip use-only access without token associated.
132 if (RI->isDefNotByToken())
133 continue;
134 unsigned Reg = RI->getRegNo();
135 Node->appendRegAcc(&*RI);
136 State[Reg] = Node;
137 }
138 continue;
139 }
140 // Build RAW, WAW, and WAR from the current state to the first access list.
141 for (auto RI = B.initreg_begin(), RE = B.initreg_end(); RI != RE; ++RI) {
142 unsigned Reg = RI->getRegNo();
143 // Check for sync barrier.
144 if (Reg == cm::patch::REG_NONE) {
145 /// Check if it's necessary to insert sync points.
146 bool reqDefSync = requireDefSync(State);
147 bool reqUseSync = requireUseSync(State);
148 if (reqDefSync || reqUseSync) {
149 DepNode *Node = getDepNode(&B, RI->getOffset(), true);
150 if (reqDefSync) Node->setRdTokenMask(unsigned(-1));
151 if (reqUseSync) Node->setWrTokenMask(unsigned(-1));
152 B.insertSyncPoint(Node);
153 // Clean state after barrier.
154 State.clear();
155 break;
156 }
157 }
158 auto SI = State.find(Reg);
159 // Skip if that register has no dependency.
160 if (SI == State.end())
161 continue;
162 auto From = SI->second;
163 // Skip if the previous node is a use without token.
164 if (From->isUseNotByToken(Reg))
165 continue;
166 // Skip RAR.
167 if (From->isUseOnly(Reg) && RI->isUse())
168 continue;
169 auto To = getDepNode(&B, RI->getOffset());
170 To->appendRegAcc(&*RI);
171 auto E = getDepEdge(From, To, From->isDef(Reg));
172 }
173 // Update the current from the last access list.
174 for (auto RI = B.finireg_begin(), RE = B.finireg_end(); RI != RE; ++RI) {
175 // Skip use-only access without token associated.
176 if (RI->isUseNotByToken())
177 continue;
178 unsigned Reg = RI->getRegNo();
179 auto Node = getDepNode(&B, RI->getOffset());
180 Node->appendRegAcc(&*RI);
181 State[Reg] = Node;
182 }
183 }
184 }
185
resolve()186 void DepGraph::resolve() {
187 // In policy 2, do nothing.
188 if (Policy == SWSB_POLICY_2)
189 return;
190
191 if (Policy == SWSB_POLICY_0) {
192 // In policy 0, insert sync barrier before each non-callee kernels.
193 for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
194 Binary &B = *I;
195 if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE)
196 continue;
197 DepNode *Node = getDepNode(&B, 0, true);
198 Node->setRdTokenMask(unsigned(-1));
199 Node->setWrTokenMask(unsigned(-1));
200 B.insertSyncPoint(Node);
201 }
202 return;
203 }
204
205 auto getEncodedDistance = [](Binary *Bin, unsigned Off) {
206 uint64_t swsb_mask = 0x000000000000FF00;
207 uint64_t qw = *((uint64_t *)(Bin->getData() + Off));
208 unsigned swsb = unsigned((qw & swsb_mask) >> 8);
209 if ((swsb & 0xf0) == 0x00)
210 return swsb & 0x7;
211 if ((swsb & 0x80) == 0x80)
212 return (swsb & 0x70) >> 4;
213 return 0U;
214 };
215
216 auto calcDistance = [](Collection &C,
217 Binary *From, unsigned FOff,
218 Binary *To, unsigned TOff) {
219 // Always assuem there's no compact instruction.
220 if (From == To)
221 return (TOff - FOff) / 16;
222 unsigned D = 0;
223 bool FoundFrom = false;
224 for (auto BI = C.bin_begin(), BE = C.bin_end(); BI != BE; ++BI) {
225 Binary *B = &*BI;
226 if (From == B) {
227 FoundFrom = true;
228 D = B->getSize() - FOff;
229 continue;
230 }
231 if (!FoundFrom)
232 continue;
233 if (To != B) {
234 D += B->getSize();
235 continue;
236 }
237 if (To == B) {
238 D += TOff;
239 break;
240 }
241 }
242 return D / 16;
243 };
244
245 auto countBits = [](unsigned V) {
246 unsigned B;
247 for (B = 0; V; V >>= 1)
248 B += V & 1;
249 return B;
250 };
251
252 // Fix the dependency. Assume edges are processed in the linking/program
253 // order.
254 for (auto EI = Edges.begin(), EE = Edges.end(); EI != EE; ++EI) {
255 auto H = EI->getHead();
256 auto T = EI->getTail();
257
258 if (H->to_empty(EI->isHeadDef()) || T->from_empty())
259 continue;
260
261 if (!T->isBarrier())
262 T->setDistance(getEncodedDistance(T->getBinary(), T->getOffset()));
263
264 for (auto RI = T->acc_begin(), RE = T->acc_end(); RI != RE; ++RI) {
265 RegAccess *To = *RI;
266 unsigned Reg = To->getRegNo();
267 for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI) {
268 auto Def = *DI;
269 for (auto AI = Def->acc_begin(), AE = Def->acc_end(); AI != AE; ++AI) {
270 RegAccess *From = *AI;
271 if (From->getRegNo() != Reg)
272 continue;
273 bool HasToken;
274 unsigned Tok;
275 std::tie(HasToken, Tok) = From->getToken();
276 if (From->isDef()) {
277 // RAW or WAW
278 if (HasToken)
279 T->mergeWrTokenMask(1 << Tok);
280 else
281 T->updateDistance(calcDistance(C,
282 Def->getBinary(), Def->getOffset(),
283 T->getBinary(), T->getOffset()));
284 } else if (To->isDef()) {
285 // WAR
286 if (HasToken)
287 T->mergeRdTokenMask(1 << Tok);
288 }
289 }
290 }
291 }
292
293 // Revise src token as syncing on def token implies syncing on src token.
294 if (T->getRdTokenMask() != unsigned(-1)) {
295 unsigned Mask = T->getRdTokenMask();
296 Mask &= ~T->getWrTokenMask();
297 T->setRdTokenMask(Mask);
298 }
299
300 // Update SWSB info and check if we need additional sync barrier.
301 if (T->getDistance() > 7) // Clear distance if it's greater than 7.
302 T->setDistance(0);
303 unsigned NumRdToks = countBits(T->getRdTokenMask());
304 unsigned NumWrToks = countBits(T->getWrTokenMask());
305 if ((NumRdToks + NumWrToks) > 0) {
306 // Already has SBID associated. Need to insert additional sync barrier.
307 DepNode *Node = T;
308 if (!T->isBarrier())
309 Node = getDepNode(T->getBinary(), T->getOffset(), true);
310 // Rd tokens.
311 if (NumRdToks == 1)
312 Node->setRdTokenMask(T->getRdTokenMask());
313 else if (NumRdToks != 0)
314 Node->setRdTokenMask(unsigned(-1));
315 // Wr tokens.
316 if (NumWrToks == 1)
317 Node->setWrTokenMask(T->getWrTokenMask());
318 else if (NumWrToks != 0)
319 Node->setWrTokenMask(unsigned(-1));
320 T->getBinary()->insertSyncPoint(Node);
321 }
322
323 for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI)
324 (*DI)->clearToNodes(EI->isHeadDef());
325 T->clearFromNodes();
326 }
327 }
328
329