/* * Copyright (c) 2019, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include "cm_fc_ld.h" #include "DepGraph.h" using namespace cm::patch; DepNode *DepGraph::getDepNode(Binary *B, unsigned Off, bool Barrier = false) { auto P = std::make_tuple(B, Off, Barrier); auto I = NodeMap.find(P); if (I != NodeMap.end()) return I->second; Nodes.push_back(DepNode{B, Off, Barrier}); DepNode *N = &Nodes.back(); NodeMap[P] = N; return N; } DepEdge *DepGraph::getDepEdge(DepNode *From, DepNode *To, bool FromDef) { if (From == To) // No dependency on itself. return nullptr; auto P = std::make_pair(From, To); auto I = EdgeMap.find(P); if (I != EdgeMap.end()) return I->second; // Add new edge. Edges.push_back(DepEdge{From, To, FromDef}); DepEdge *E = &Edges.back(); EdgeMap[P] = E; From->addToNode(To, FromDef); To->addFromNode(From); return E; } void DepGraph::build() { // There's no need to build dependency graph for policy 0 & 2. if (Policy == SWSB_POLICY_0 || Policy == SWSB_POLICY_2) return; std::map State; auto requireDefSync = [](std::map &State) { for (auto &KV : State) if (KV.second->isDefByToken(KV.first)) return true; return false; }; auto requireUseSync = [](std::map &State) { for (auto &KV : State) if (KV.second->isUseByToken(KV.first)) return true; return false; }; unsigned Order = 0; for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) { Binary &B = *I; if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE) break; B.setOrder(++Order); if (B.getLinkType() == CM_FC_LINK_TYPE_CALLER) { // For caller, add dependency edges to the sync point just before 'call'. Relocation &Rel = *B.rel_begin(); // Assume there's just one 'call'. Symbol *Sym = Rel.getSymbol(); Binary &Callee = *Sym->getBinary(); // Add a barrier node just before 'call' to resolve dependency. DepNode *Node = getDepNode(&B, Rel.getOffset(), true); for (auto RI = Callee.initreg_begin(), RE = Callee.initreg_end(); RI != RE; ++RI) { unsigned Reg = RI->getRegNo(); if (Reg == cm::patch::REG_NONE) { /// Check if it's necessary to insert sync points. bool reqDefSync = requireDefSync(State); bool reqUseSync = requireUseSync(State); if (reqDefSync || reqUseSync) { Node->clearAccList(); if (reqDefSync) Node->setRdTokenMask(unsigned(-1)); if (reqUseSync) Node->setWrTokenMask(unsigned(-1)); B.insertSyncPoint(Node); // Clear state after barrier. State.clear(); break; } } // Only build token-based dependency. auto SI = State.find(Reg); if (SI == State.end()) continue; auto From = SI->second; // Skip access not by token. if (!From->isDefByToken(Reg) && !From->isUseByToken(Reg)) continue; // Skip RAR. if (From->isUseOnly(Reg) && RI->isUse()) continue; Node->appendRegAcc(&*RI); auto E = getDepEdge(From, Node, From->isDef(Reg)); } // Only update token-based dependency. Node = getDepNode(&B, Rel.getOffset()); for (auto RI = Callee.finireg_begin(), RE = Callee.finireg_end(); RI != RE; ++RI) { // Skip use-only access without token associated. if (RI->isDefNotByToken()) continue; unsigned Reg = RI->getRegNo(); Node->appendRegAcc(&*RI); State[Reg] = Node; } continue; } // Build RAW, WAW, and WAR from the current state to the first access list. for (auto RI = B.initreg_begin(), RE = B.initreg_end(); RI != RE; ++RI) { unsigned Reg = RI->getRegNo(); // Check for sync barrier. if (Reg == cm::patch::REG_NONE) { /// Check if it's necessary to insert sync points. bool reqDefSync = requireDefSync(State); bool reqUseSync = requireUseSync(State); if (reqDefSync || reqUseSync) { DepNode *Node = getDepNode(&B, RI->getOffset(), true); if (reqDefSync) Node->setRdTokenMask(unsigned(-1)); if (reqUseSync) Node->setWrTokenMask(unsigned(-1)); B.insertSyncPoint(Node); // Clean state after barrier. State.clear(); break; } } auto SI = State.find(Reg); // Skip if that register has no dependency. if (SI == State.end()) continue; auto From = SI->second; // Skip if the previous node is a use without token. if (From->isUseNotByToken(Reg)) continue; // Skip RAR. if (From->isUseOnly(Reg) && RI->isUse()) continue; auto To = getDepNode(&B, RI->getOffset()); To->appendRegAcc(&*RI); auto E = getDepEdge(From, To, From->isDef(Reg)); } // Update the current from the last access list. for (auto RI = B.finireg_begin(), RE = B.finireg_end(); RI != RE; ++RI) { // Skip use-only access without token associated. if (RI->isUseNotByToken()) continue; unsigned Reg = RI->getRegNo(); auto Node = getDepNode(&B, RI->getOffset()); Node->appendRegAcc(&*RI); State[Reg] = Node; } } } void DepGraph::resolve() { // In policy 2, do nothing. if (Policy == SWSB_POLICY_2) return; if (Policy == SWSB_POLICY_0) { // In policy 0, insert sync barrier before each non-callee kernels. for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) { Binary &B = *I; if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE) continue; DepNode *Node = getDepNode(&B, 0, true); Node->setRdTokenMask(unsigned(-1)); Node->setWrTokenMask(unsigned(-1)); B.insertSyncPoint(Node); } return; } auto getEncodedDistance = [](Binary *Bin, unsigned Off) { uint64_t swsb_mask = 0x000000000000FF00; uint64_t qw = *((uint64_t *)(Bin->getData() + Off)); unsigned swsb = unsigned((qw & swsb_mask) >> 8); if ((swsb & 0xf0) == 0x00) return swsb & 0x7; if ((swsb & 0x80) == 0x80) return (swsb & 0x70) >> 4; return 0U; }; auto calcDistance = [](Collection &C, Binary *From, unsigned FOff, Binary *To, unsigned TOff) { // Always assuem there's no compact instruction. if (From == To) return (TOff - FOff) / 16; unsigned D = 0; bool FoundFrom = false; for (auto BI = C.bin_begin(), BE = C.bin_end(); BI != BE; ++BI) { Binary *B = &*BI; if (From == B) { FoundFrom = true; D = B->getSize() - FOff; continue; } if (!FoundFrom) continue; if (To != B) { D += B->getSize(); continue; } if (To == B) { D += TOff; break; } } return D / 16; }; auto countBits = [](unsigned V) { unsigned B; for (B = 0; V; V >>= 1) B += V & 1; return B; }; // Fix the dependency. Assume edges are processed in the linking/program // order. for (auto EI = Edges.begin(), EE = Edges.end(); EI != EE; ++EI) { auto H = EI->getHead(); auto T = EI->getTail(); if (H->to_empty(EI->isHeadDef()) || T->from_empty()) continue; if (!T->isBarrier()) T->setDistance(getEncodedDistance(T->getBinary(), T->getOffset())); for (auto RI = T->acc_begin(), RE = T->acc_end(); RI != RE; ++RI) { RegAccess *To = *RI; unsigned Reg = To->getRegNo(); for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI) { auto Def = *DI; for (auto AI = Def->acc_begin(), AE = Def->acc_end(); AI != AE; ++AI) { RegAccess *From = *AI; if (From->getRegNo() != Reg) continue; bool HasToken; unsigned Tok; std::tie(HasToken, Tok) = From->getToken(); if (From->isDef()) { // RAW or WAW if (HasToken) T->mergeWrTokenMask(1 << Tok); else T->updateDistance(calcDistance(C, Def->getBinary(), Def->getOffset(), T->getBinary(), T->getOffset())); } else if (To->isDef()) { // WAR if (HasToken) T->mergeRdTokenMask(1 << Tok); } } } } // Revise src token as syncing on def token implies syncing on src token. if (T->getRdTokenMask() != unsigned(-1)) { unsigned Mask = T->getRdTokenMask(); Mask &= ~T->getWrTokenMask(); T->setRdTokenMask(Mask); } // Update SWSB info and check if we need additional sync barrier. if (T->getDistance() > 7) // Clear distance if it's greater than 7. T->setDistance(0); unsigned NumRdToks = countBits(T->getRdTokenMask()); unsigned NumWrToks = countBits(T->getWrTokenMask()); if ((NumRdToks + NumWrToks) > 0) { // Already has SBID associated. Need to insert additional sync barrier. DepNode *Node = T; if (!T->isBarrier()) Node = getDepNode(T->getBinary(), T->getOffset(), true); // Rd tokens. if (NumRdToks == 1) Node->setRdTokenMask(T->getRdTokenMask()); else if (NumRdToks != 0) Node->setRdTokenMask(unsigned(-1)); // Wr tokens. if (NumWrToks == 1) Node->setWrTokenMask(T->getWrTokenMask()); else if (NumWrToks != 0) Node->setWrTokenMask(unsigned(-1)); T->getBinary()->insertSyncPoint(Node); } for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI) (*DI)->clearToNodes(EI->isHeadDef()); T->clearFromNodes(); } }