1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 
23 #include <cassert>
24 #include <cstddef>
25 #include <cstdio>
26 #include <map>
27 #include <tuple>
28 
29 #include "cm_fc_ld.h"
30 
31 #include "DepGraph.h"
32 
33 using namespace cm::patch;
34 
getDepNode(Binary * B,unsigned Off,bool Barrier=false)35 DepNode *DepGraph::getDepNode(Binary *B, unsigned Off, bool Barrier = false) {
36   auto P = std::make_tuple(B, Off, Barrier);
37   auto I = NodeMap.find(P);
38   if (I != NodeMap.end())
39     return I->second;
40   Nodes.push_back(DepNode{B, Off, Barrier});
41   DepNode *N = &Nodes.back();
42   NodeMap[P] = N;
43   return N;
44 }
45 
getDepEdge(DepNode * From,DepNode * To,bool FromDef)46 DepEdge *DepGraph::getDepEdge(DepNode *From, DepNode *To, bool FromDef) {
47   if (From == To) // No dependency on itself.
48     return nullptr;
49   auto P = std::make_pair(From, To);
50   auto I = EdgeMap.find(P);
51   if (I != EdgeMap.end())
52     return I->second;
53   // Add new edge.
54   Edges.push_back(DepEdge{From, To, FromDef});
55   DepEdge *E = &Edges.back();
56   EdgeMap[P] = E;
57   From->addToNode(To, FromDef);
58   To->addFromNode(From);
59   return E;
60 }
61 
build()62 void DepGraph::build() {
63   // There's no need to build dependency graph for policy 0 & 2.
64   if (Policy == SWSB_POLICY_0 || Policy == SWSB_POLICY_2)
65     return;
66 
67   std::map<unsigned, DepNode *> State;
68 
69   auto requireDefSync = [](std::map<unsigned, DepNode *> &State) {
70     for (auto &KV : State)
71       if (KV.second->isDefByToken(KV.first))
72         return true;
73     return false;
74   };
75 
76   auto requireUseSync = [](std::map<unsigned, DepNode *> &State) {
77     for (auto &KV : State)
78       if (KV.second->isUseByToken(KV.first))
79         return true;
80     return false;
81   };
82 
83   unsigned Order = 0;
84   for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
85     Binary &B = *I;
86     if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE)
87       break;
88     B.setOrder(++Order);
89     if (B.getLinkType() == CM_FC_LINK_TYPE_CALLER) {
90       // For caller, add dependency edges to the sync point just before 'call'.
91       Relocation &Rel = *B.rel_begin(); // Assume there's just one 'call'.
92       Symbol *Sym = Rel.getSymbol();
93       Binary &Callee = *Sym->getBinary();
94       // Add a barrier node just before 'call' to resolve dependency.
95       DepNode *Node = getDepNode(&B, Rel.getOffset(), true);
96       for (auto RI = Callee.initreg_begin(),
97                 RE = Callee.initreg_end(); RI != RE; ++RI) {
98         unsigned Reg = RI->getRegNo();
99         if (Reg == cm::patch::REG_NONE) {
100           /// Check if it's necessary to insert sync points.
101           bool reqDefSync = requireDefSync(State);
102           bool reqUseSync = requireUseSync(State);
103           if (reqDefSync || reqUseSync) {
104             Node->clearAccList();
105             if (reqDefSync) Node->setRdTokenMask(unsigned(-1));
106             if (reqUseSync) Node->setWrTokenMask(unsigned(-1));
107             B.insertSyncPoint(Node);
108             // Clear state after barrier.
109             State.clear();
110             break;
111           }
112         }
113         // Only build token-based dependency.
114         auto SI = State.find(Reg);
115         if (SI == State.end())
116           continue;
117         auto From = SI->second;
118         // Skip access not by token.
119         if (!From->isDefByToken(Reg) && !From->isUseByToken(Reg))
120           continue;
121         // Skip RAR.
122         if (From->isUseOnly(Reg) && RI->isUse())
123           continue;
124         Node->appendRegAcc(&*RI);
125         auto E = getDepEdge(From, Node, From->isDef(Reg));
126       }
127       // Only update token-based dependency.
128       Node = getDepNode(&B, Rel.getOffset());
129       for (auto RI = Callee.finireg_begin(),
130                 RE = Callee.finireg_end(); RI != RE; ++RI) {
131         // Skip use-only access without token associated.
132         if (RI->isDefNotByToken())
133           continue;
134         unsigned Reg = RI->getRegNo();
135         Node->appendRegAcc(&*RI);
136         State[Reg] = Node;
137       }
138       continue;
139     }
140     // Build RAW, WAW, and WAR from the current state to the first access list.
141     for (auto RI = B.initreg_begin(), RE = B.initreg_end(); RI != RE; ++RI) {
142       unsigned Reg = RI->getRegNo();
143       // Check for sync barrier.
144       if (Reg == cm::patch::REG_NONE) {
145         /// Check if it's necessary to insert sync points.
146         bool reqDefSync = requireDefSync(State);
147         bool reqUseSync = requireUseSync(State);
148         if (reqDefSync || reqUseSync) {
149           DepNode *Node = getDepNode(&B, RI->getOffset(), true);
150           if (reqDefSync) Node->setRdTokenMask(unsigned(-1));
151           if (reqUseSync) Node->setWrTokenMask(unsigned(-1));
152           B.insertSyncPoint(Node);
153           // Clean state after barrier.
154           State.clear();
155           break;
156         }
157       }
158       auto SI = State.find(Reg);
159       // Skip if that register has no dependency.
160       if (SI == State.end())
161         continue;
162       auto From = SI->second;
163       // Skip if the previous node is a use without token.
164       if (From->isUseNotByToken(Reg))
165         continue;
166       // Skip RAR.
167       if (From->isUseOnly(Reg) && RI->isUse())
168         continue;
169       auto To = getDepNode(&B, RI->getOffset());
170       To->appendRegAcc(&*RI);
171       auto E = getDepEdge(From, To, From->isDef(Reg));
172     }
173     // Update the current from the last access list.
174     for (auto RI = B.finireg_begin(), RE = B.finireg_end(); RI != RE; ++RI) {
175       // Skip use-only access without token associated.
176       if (RI->isUseNotByToken())
177         continue;
178       unsigned Reg = RI->getRegNo();
179       auto Node = getDepNode(&B, RI->getOffset());
180       Node->appendRegAcc(&*RI);
181       State[Reg] = Node;
182     }
183   }
184 }
185 
resolve()186 void DepGraph::resolve() {
187   // In policy 2, do nothing.
188   if (Policy == SWSB_POLICY_2)
189     return;
190 
191   if (Policy == SWSB_POLICY_0) {
192     // In policy 0, insert sync barrier before each non-callee kernels.
193     for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
194       Binary &B = *I;
195       if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE)
196         continue;
197       DepNode *Node = getDepNode(&B, 0, true);
198       Node->setRdTokenMask(unsigned(-1));
199       Node->setWrTokenMask(unsigned(-1));
200       B.insertSyncPoint(Node);
201     }
202     return;
203   }
204 
205   auto getEncodedDistance = [](Binary *Bin, unsigned Off) {
206     uint64_t swsb_mask = 0x000000000000FF00;
207     uint64_t qw = *((uint64_t *)(Bin->getData() + Off));
208     unsigned swsb = unsigned((qw & swsb_mask) >> 8);
209     if ((swsb & 0xf0) == 0x00)
210       return swsb & 0x7;
211     if ((swsb & 0x80) == 0x80)
212       return (swsb & 0x70) >> 4;
213     return 0U;
214   };
215 
216   auto calcDistance = [](Collection &C,
217                          Binary *From, unsigned FOff,
218                          Binary *To, unsigned TOff) {
219     // Always assuem there's no compact instruction.
220     if (From == To)
221       return (TOff - FOff) / 16;
222     unsigned D = 0;
223     bool FoundFrom = false;
224     for (auto BI = C.bin_begin(), BE = C.bin_end(); BI != BE; ++BI) {
225       Binary *B = &*BI;
226       if (From == B) {
227         FoundFrom = true;
228         D = B->getSize() - FOff;
229         continue;
230       }
231       if (!FoundFrom)
232         continue;
233       if (To != B) {
234         D += B->getSize();
235         continue;
236       }
237       if (To == B) {
238         D += TOff;
239         break;
240       }
241     }
242     return D / 16;
243   };
244 
245   auto countBits = [](unsigned V) {
246     unsigned B;
247     for (B = 0; V; V >>= 1)
248       B += V & 1;
249     return B;
250   };
251 
252   // Fix the dependency. Assume edges are processed in the linking/program
253   // order.
254   for (auto EI = Edges.begin(), EE = Edges.end(); EI != EE; ++EI) {
255     auto H = EI->getHead();
256     auto T = EI->getTail();
257 
258     if (H->to_empty(EI->isHeadDef()) || T->from_empty())
259       continue;
260 
261     if (!T->isBarrier())
262       T->setDistance(getEncodedDistance(T->getBinary(), T->getOffset()));
263 
264     for (auto RI = T->acc_begin(), RE = T->acc_end(); RI != RE; ++RI) {
265       RegAccess *To = *RI;
266       unsigned Reg = To->getRegNo();
267       for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI) {
268         auto Def = *DI;
269         for (auto AI = Def->acc_begin(), AE = Def->acc_end(); AI != AE; ++AI) {
270           RegAccess *From = *AI;
271           if (From->getRegNo() != Reg)
272             continue;
273           bool HasToken;
274           unsigned Tok;
275           std::tie(HasToken, Tok) = From->getToken();
276           if (From->isDef()) {
277             // RAW or WAW
278             if (HasToken)
279               T->mergeWrTokenMask(1 << Tok);
280             else
281               T->updateDistance(calcDistance(C,
282                                              Def->getBinary(), Def->getOffset(),
283                                              T->getBinary(), T->getOffset()));
284           } else if (To->isDef()) {
285             // WAR
286             if (HasToken)
287               T->mergeRdTokenMask(1 << Tok);
288           }
289         }
290       }
291     }
292 
293     // Revise src token as syncing on def token implies syncing on src token.
294     if (T->getRdTokenMask() != unsigned(-1)) {
295       unsigned Mask = T->getRdTokenMask();
296       Mask &= ~T->getWrTokenMask();
297       T->setRdTokenMask(Mask);
298     }
299 
300     // Update SWSB info and check if we need additional sync barrier.
301     if (T->getDistance() > 7) // Clear distance if it's greater than 7.
302       T->setDistance(0);
303     unsigned NumRdToks = countBits(T->getRdTokenMask());
304     unsigned NumWrToks = countBits(T->getWrTokenMask());
305     if ((NumRdToks + NumWrToks) > 0) {
306       // Already has SBID associated. Need to insert additional sync barrier.
307       DepNode *Node = T;
308       if (!T->isBarrier())
309         Node = getDepNode(T->getBinary(), T->getOffset(), true);
310       // Rd tokens.
311       if (NumRdToks == 1)
312         Node->setRdTokenMask(T->getRdTokenMask());
313       else if (NumRdToks != 0)
314         Node->setRdTokenMask(unsigned(-1));
315       // Wr tokens.
316       if (NumWrToks == 1)
317         Node->setWrTokenMask(T->getWrTokenMask());
318       else if (NumWrToks != 0)
319         Node->setWrTokenMask(unsigned(-1));
320       T->getBinary()->insertSyncPoint(Node);
321     }
322 
323     for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI)
324       (*DI)->clearToNodes(EI->isHeadDef());
325     T->clearFromNodes();
326   }
327 }
328 
329