1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #include "LoopAnalysis.h"
10 #include "G4_Kernel.hpp"
11 #include "G4_BB.hpp"
12 #include "BitSet.h"
13 
14 using namespace vISA;
15 
InterSect(G4_BB * bb,int i,int k)16 G4_BB* ImmDominator::InterSect(G4_BB* bb, int i, int k)
17 {
18     recomputeIfStale();
19 
20     G4_BB* finger1 = immDoms[bb->getId()][i];
21     G4_BB* finger2 = immDoms[bb->getId()][k];
22 
23     while ((finger1 != finger2) &&
24         (finger1 != nullptr) &&
25         (finger2 != nullptr))
26     {
27         if (finger1->getPreId() == finger2->getPreId())
28         {
29             assert(finger1 == kernel.fg.getEntryBB() || finger2 == kernel.fg.getEntryBB());
30             return kernel.fg.getEntryBB();
31         }
32 
33         while ((iDoms[finger1->getId()] != nullptr) &&
34             (finger1->getPreId() > finger2->getPreId()))
35         {
36             finger1 = iDoms[finger1->getId()];
37             immDoms[bb->getId()][i] = finger1;
38         }
39 
40         while ((iDoms[finger2->getId()] != nullptr) &&
41             (finger2->getPreId() > finger1->getPreId()))
42         {
43             finger2 = iDoms[finger2->getId()];
44             immDoms[bb->getId()][k] = finger2;
45         }
46 
47         if ((iDoms[finger2->getId()] == nullptr) ||
48             (iDoms[finger1->getId()] == nullptr))
49         {
50             break;
51         }
52     }
53 
54     if (finger1 == finger2)
55     {
56         return finger1;
57     }
58     else if (finger1->getPreId() > finger2->getPreId())
59     {
60         return finger2;
61     }
62     else
63     {
64         return finger1;
65     }
66 }
67 
68 /*
69 * An improvement on the algorithm from "A Simple, Fast Dominance Algorithm"
70 * 1. Single pred assginment.
71 * 2. To reduce the back trace in the intersect function, a temp buffer for predictor of each nodes is used to record the back trace result.
72 */
runIDOM()73 void ImmDominator::runIDOM()
74 {
75     iDoms.resize(kernel.fg.size());
76     immDoms.resize(kernel.fg.size());
77 
78     for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
79     {
80         auto bb = *I;
81         iDoms[bb->getId()] = nullptr;
82         immDoms[bb->getId()].resize(bb->Preds.size());
83 
84         size_t i = 0;
85         for (auto pred : bb->Preds)
86         {
87             immDoms[bb->getId()][i] = pred;
88             i++;
89         }
90     }
91 
92     entryBB = kernel.fg.getEntryBB();
93     iDoms[entryBB->getId()] = { entryBB };
94 
95     // Actual dom computation
96     bool change = true;
97     while (change)
98     {
99         change = false;
100         for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
101         {
102             auto bb = *I;
103             if (bb == entryBB)
104                 continue;
105 
106             if (bb->Preds.size() == 1)
107             {
108                 if (iDoms[bb->getId()] == nullptr)
109                 {
110                     iDoms[bb->getId()] = (*bb->Preds.begin());
111                     change = true;
112                 }
113                 else
114                 {
115                     assert(iDoms[bb->getId()] == (*bb->Preds.begin()));
116                 }
117             }
118             else
119             {
120                 G4_BB* tmpIdom = nullptr;
121                 int i = 0;
122                 for (auto pred : bb->Preds)
123                 {
124                     if (iDoms[pred->getId()] != nullptr)
125                     {
126                         tmpIdom = pred;
127                         break;
128                     }
129                     i++;
130                 }
131 
132                 if (tmpIdom != nullptr)
133                 {
134                     int k = 0;
135                     for (auto pred : bb->Preds)
136                     {
137                         if (k == i)
138                         {
139                             k++;
140                             continue;
141                         }
142 
143                         if (iDoms[pred->getId()] != nullptr)
144                         {
145                             tmpIdom = InterSect(bb, i, k);
146                         }
147                         k++;
148                     }
149 
150                     if (iDoms[bb->getId()] == nullptr ||
151                         iDoms[bb->getId()] != tmpIdom)
152                     {
153                         iDoms[bb->getId()] = tmpIdom;
154                         change = true;
155                     }
156                 }
157             }
158         }
159     }
160 }
161 
runDOM()162 void Dominator::runDOM()
163 {
164     // Use bit-set to represent BBs in dominator computation.
165 
166     // domBS holds a bitset for every BB in program. Bitset instance
167     // at given index represents list of BBs that dominate BB#index.
168     std::vector<BitSet> domsBS;
169 
170     // Construct bitsets corresponding to all BBs. Initialize all
171     // bits to 1.
172     for (unsigned int i = 0; i != kernel.fg.size(); ++i)
173         domsBS.emplace_back(kernel.fg.size(), true);
174 
175     entryBB = kernel.fg.getEntryBB();
176 
177     MUST_BE_TRUE(entryBB != nullptr, "Entry BB not found!");
178 
179     // In dominator computation, entryBB is it's only dominator. All
180     // other BBs have dominator set to every other BB in program.
181     domsBS[entryBB->getId()].clear();
182     domsBS[entryBB->getId()].set(entryBB->getId(), true);
183 
184     // Actual dom computation
185     bool change = true;
186     BitSet oldBS(kernel.fg.size(), false);
187     while (change)
188     {
189         change = false;
190         for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
191         {
192             auto bb = *I;
193             if (bb == entryBB)
194                 continue;
195 
196             oldBS.swap(domsBS[bb->getId()]);
197 
198             domsBS[bb->getId()].setAll();
199             // Dominators[BB] = Intersection(Dominators[All Pred BBs]) + BB
200             for (auto preds : bb->Preds)
201             {
202                 domsBS[bb->getId()] &= domsBS[preds->getId()];
203             }
204 
205             // Add bb to it's own dominator set
206             domsBS[bb->getId()].set(bb->getId(), true);
207 
208             if (oldBS != domsBS[bb->getId()])
209             {
210                 // Dominator for bb was modified
211                 change = true;
212                 break;
213             }
214         }
215     }
216 
217     std::vector<G4_BB*> indexedBBs;
218     indexedBBs.resize(kernel.fg.size());
219     for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
220     {
221         auto BB = *I;
222         indexedBBs[BB->getId()] = BB;
223     }
224 
225     Doms.resize(kernel.fg.size());
226     for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
227     {
228         auto BB = *I;
229 
230         auto& domBS = domsBS[BB->getId()];
231 
232         for (unsigned int i = 0; i != domsBS.size(); ++i)
233         {
234             if (domBS.isSet(i))
235             {
236                 auto domBB = indexedBBs[i];
237                 Doms[BB->getId()].insert(domBB);
238             }
239         }
240     }
241 }
242 
243 
getDom(G4_BB * bb)244 std::unordered_set<G4_BB*>& Dominator::getDom(G4_BB* bb)
245 {
246     recomputeIfStale();
247 
248     return Doms[bb->getId()];
249 }
250 
getImmDom(G4_BB * bb)251 std::vector<G4_BB*>& ImmDominator::getImmDom(G4_BB* bb)
252 {
253     recomputeIfStale();
254 
255     return immDoms[bb->getId()];
256 }
257 
updateImmDom()258 void ImmDominator::updateImmDom()
259 {
260     std::vector<BitSet> domBits(kernel.fg.size());
261 
262     for (size_t i = 0; i < kernel.fg.size(); i++)
263     {
264         domBits[i] = BitSet(unsigned(kernel.fg.size()), false);
265     }
266 
267     // Update immDom vector with correct ordering
268     for (auto bb : kernel.fg)
269     {
270         auto& DomBBs = kernel.fg.getDominator().getDom(bb);
271 
272         for (auto domBB : DomBBs)
273         {
274             domBits[bb->getId()].set(domBB->getId(), true);
275         }
276     }
277 
278     iDoms.resize(kernel.fg.size());
279     for (auto bb : kernel.fg)
280     {
281         auto& DomBBs = kernel.fg.getDominator().getDom(bb);
282         BitSet tmpBits = domBits[bb->getId()];
283         tmpBits.set(bb->getId(), false);
284         iDoms[bb->getId()] = bb;
285 
286         for (auto domBB : DomBBs)
287         {
288             if (domBB == bb)
289                 continue;
290 
291             if (tmpBits == domBits[domBB->getId()])
292             {
293                 iDoms[bb->getId()] = domBB;
294             }
295         }
296     }
297 }
298 
reset()299 void ImmDominator::reset()
300 {
301     iDoms.clear();
302     immDoms.clear();
303 
304     setStale();
305 }
306 
run()307 void ImmDominator::run()
308 {
309     // this function re-runs analysis. caller needs to check if
310     // analysis is stale.
311     entryBB = kernel.fg.getEntryBB();
312 
313     runIDOM();
314 
315     setValid();
316 }
317 
dump(std::ostream & os)318 void ImmDominator::dump(std::ostream& os)
319 {
320     if (isStale())
321         os << "Imm dominator data is stale.\n";
322 
323     os << "\n\nImm dom:\n";
324     dumpImmDom(os);
325 }
326 
reset()327 void Dominator::reset()
328 {
329     Doms.clear();
330 
331     setStale();
332 }
333 
run()334 void Dominator::run()
335 {
336     // this function re-runs analysis. caller needs to check if
337     // analysis is stale.
338     entryBB = kernel.fg.getEntryBB();
339 
340     runDOM();
341 
342     setValid();
343 }
344 
dump(std::ostream & os)345 void Dominator::dump(std::ostream& os)
346 {
347     if (isStale())
348         os << "Dominator data is stale.\n";
349 
350     os << "Dom:\n";
351     dumpDom(os);
352 }
353 
getIDoms()354 const std::vector<G4_BB*>& ImmDominator::getIDoms()
355 {
356     recomputeIfStale();
357 
358     return iDoms;
359 }
360 
getCommonImmDom(const std::unordered_set<G4_BB * > & bbs)361 G4_BB* ImmDominator::getCommonImmDom(const std::unordered_set<G4_BB*>& bbs)
362 {
363     recomputeIfStale();
364 
365     if (bbs.size() == 0)
366         return nullptr;
367 
368     unsigned int maxId = (*bbs.begin())->getId();
369 
370     auto commonImmDoms = getImmDom(*bbs.begin());
371     for (auto bb : bbs)
372     {
373         maxId = std::max(maxId, bb->getId());
374 
375         const auto& DomBB = kernel.fg.getDominator().getDom(bb);
376         for (G4_BB*& dom : commonImmDoms)
377         {
378             if (dom != nullptr && DomBB.find(dom) == DomBB.end())
379             {
380                 dom = nullptr;
381             }
382         }
383     }
384 
385     // Return first imm dom that is not a BB from bbs set
386     for (G4_BB* dom : commonImmDoms)
387     {
388         if (dom &&
389             // Common imm pdom must be lexically last BB
390             dom->getId() >= maxId &&
391             ((dom->size() > 1 && dom->front()->isLabel()) ||
392                 (dom->size() > 0 && !dom->front()->isLabel())))
393         {
394             return dom;
395         }
396     }
397 
398     return entryBB;
399 }
400 
dumpImmDom(std::ostream & os)401 void ImmDominator::dumpImmDom(std::ostream& os)
402 {
403     for (auto bb : kernel.fg)
404     {
405         os << "BB" << bb->getId() << " - ";
406         auto& domBBs = immDoms[bb->getId()];
407         for (auto domBB : domBBs)
408         {
409             os << "BB" << domBB->getId();
410             if (domBB->getLabel())
411             {
412                 os << " (" << domBB->getLabel()->getLabel() << ")";
413             }
414             os << ", ";
415         }
416         os << "\n";
417     }
418 }
419 
dumpDom(std::ostream & os)420 void vISA::Dominator::dumpDom(std::ostream& os)
421 {
422     for (auto bb : kernel.fg)
423     {
424         os << "BB" << bb->getId() << " - ";
425         auto& domBBs = Doms[bb->getId()];
426         for (auto domBB : domBBs)
427         {
428             os << "BB" << domBB->getId();
429             if (domBB->getLabel())
430             {
431                 os << " (" << domBB->getLabel()->getLabel() << ")";
432             }
433             os << ", ";
434         }
435         os << "\n";
436     }
437 }
438 
439 // return true if bb1 dominates bb2
dominates(G4_BB * bb1,G4_BB * bb2)440 bool Dominator::dominates(G4_BB* bb1, G4_BB* bb2)
441 {
442     recomputeIfStale();
443 
444     auto& dom = getDom(bb2);
445     if (dom.find(bb1) != dom.end())
446         return true;
447 
448     return false;
449 }
450 
recomputeIfStale()451 void Analysis::recomputeIfStale()
452 {
453     if (!isStale() || inProgress)
454         return;
455 
456     inProgress = true;
457     reset();
458     run();
459     inProgress = false;
460 }
461 
PostDom(G4_Kernel & k)462 PostDom::PostDom(G4_Kernel& k) : kernel(k)
463 {
464 }
465 
reset()466 void PostDom::reset()
467 {
468     postDoms.clear();
469     immPostDoms.clear();
470 
471     setStale();
472 }
473 
run()474 void PostDom::run()
475 {
476     exitBB = nullptr;
477     auto numBBs = kernel.fg.size();
478     postDoms.resize(numBBs);
479     immPostDoms.resize(numBBs);
480 
481     for (auto bb_rit = kernel.fg.rbegin(); bb_rit != kernel.fg.rend(); bb_rit++)
482     {
483         auto bb = *bb_rit;
484         if (bb->size() > 0)
485         {
486             auto lastInst = bb->back();
487             if (lastInst->isEOT())
488             {
489                 exitBB = bb;
490                 break;
491             }
492         }
493     }
494 
495     MUST_BE_TRUE(exitBB != nullptr, "Exit BB not found!");
496 
497     postDoms[exitBB->getId()] = { exitBB };
498     std::unordered_set<G4_BB*> allBBs(kernel.fg.cbegin(), kernel.fg.cend());
499 
500     for (auto bb : kernel.fg)
501     {
502         if (bb != exitBB)
503         {
504             postDoms[bb->getId()] = allBBs;
505         }
506     }
507 
508     // Actual post dom computation
509     bool change = true;
510     while (change)
511     {
512         change = false;
513         for (auto bb : kernel.fg)
514         {
515             if (bb == exitBB)
516                 continue;
517 
518             std::unordered_set<G4_BB*> tmp = { bb };
519             // Compute intersection of pdom of successors
520             std::unordered_map<G4_BB*, unsigned> numInstances;
521             for (auto succs : bb->Succs)
522             {
523                 auto& pdomSucc = postDoms[succs->getId()];
524                 for (auto pdomSuccBB : pdomSucc)
525                 {
526                     auto it = numInstances.find(pdomSuccBB);
527                     if (it == numInstances.end())
528                         numInstances.insert(std::make_pair(pdomSuccBB, 1));
529                     else
530                         it->second = it->second + 1;
531                 }
532             }
533 
534             // Common BBs appear in numInstances map with second value == bb->Succs count
535             for (auto commonBBs : numInstances)
536             {
537                 if (commonBBs.second == bb->Succs.size())
538                     tmp.insert(commonBBs.first);
539             }
540 
541             // Check if postDom set changed for bb in current iter
542             if (tmp.size() != postDoms[bb->getId()].size())
543             {
544                 postDoms[bb->getId()] = tmp;
545                 change = true;
546                 continue;
547             }
548             else
549             {
550                 auto& pdomBB = postDoms[bb->getId()];
551                 for (auto tmpBB : tmp)
552                 {
553                     if (pdomBB.find(tmpBB) == pdomBB.end())
554                     {
555                         postDoms[bb->getId()] = tmp;
556                         change = true;
557                         break;
558                     }
559                     if (change)
560                         break;
561                 }
562             }
563         }
564     }
565 
566     setValid();
567     updateImmPostDom();
568 }
569 
getPostDom(G4_BB * bb)570 std::unordered_set<G4_BB*>& PostDom::getPostDom(G4_BB* bb)
571 {
572     recomputeIfStale();
573 
574     return postDoms[bb->getId()];
575 }
576 
dumpImmDom(std::ostream & os)577 void PostDom::dumpImmDom(std::ostream& os)
578 {
579     if (isStale())
580         os << "PostDom data is stale.\n";
581 
582     for (auto bb : kernel.fg)
583     {
584         os << "BB" << bb->getId();
585         auto& pdomBBs = immPostDoms[bb->getId()];
586         for (auto pdomBB : pdomBBs)
587         {
588             os << "BB" << pdomBB->getId();
589             if (pdomBB->getLabel())
590             {
591                 os << " (" << pdomBB->getLabel()->getLabel() << ")";
592             }
593             os << ", ";
594         }
595         os << "\n";
596     }
597 }
598 
getImmPostDom(G4_BB * bb)599 std::vector<G4_BB*>& PostDom::getImmPostDom(G4_BB* bb)
600 {
601     recomputeIfStale();
602 
603     return immPostDoms[bb->getId()];
604 }
605 
updateImmPostDom()606 void PostDom::updateImmPostDom()
607 {
608     // Update immPostDom vector with correct ordering
609     for (auto bb : kernel.fg)
610     {
611         auto& postDomBBs = postDoms[bb->getId()];
612         auto& immPostDomBB = immPostDoms[bb->getId()];
613         immPostDomBB.resize(postDomBBs.size());
614         immPostDomBB[0] = bb;
615 
616         for (auto pdomBB : postDomBBs)
617         {
618             if (pdomBB == bb)
619                 continue;
620 
621             immPostDomBB[postDomBBs.size() - postDoms[pdomBB->getId()].size()] = pdomBB;
622         }
623     }
624 }
625 
getCommonImmDom(std::unordered_set<G4_BB * > & bbs)626 G4_BB* PostDom::getCommonImmDom(std::unordered_set<G4_BB*>& bbs)
627 {
628     recomputeIfStale();
629 
630     if (bbs.size() == 0)
631         return nullptr;
632 
633     unsigned maxId = (*bbs.begin())->getId();
634 
635     auto commonImmDoms = getImmPostDom(*bbs.begin());
636     for (auto bb : bbs)
637     {
638         if (bb->getId() > maxId)
639             maxId = bb->getId();
640 
641         auto& postDomBB = postDoms[bb->getId()];
642         for (unsigned i = 0, size = commonImmDoms.size(); i != size; i++)
643         {
644             if (commonImmDoms[i])
645             {
646                 if (postDomBB.find(commonImmDoms[i]) == postDomBB.end())
647                 {
648                     commonImmDoms[i] = nullptr;
649                 }
650             }
651         }
652     }
653 
654     // Return first imm dom that is not a BB from bbs set
655     for (G4_BB* commonImmDom : commonImmDoms)
656     {
657         if (commonImmDom &&
658             // Common imm pdom must be lexically last BB
659             commonImmDom->getId() >= maxId &&
660             ((commonImmDom->size() > 1 && commonImmDom->front()->isLabel()) ||
661                 (commonImmDom->size() > 0 && !commonImmDom->front()->isLabel())))
662         {
663             return commonImmDom;
664         }
665     }
666 
667     return exitBB;
668 }
669 
LoopDetection(G4_Kernel & k)670 LoopDetection::LoopDetection(G4_Kernel& k) : kernel(k), fg(k.fg)
671 {
672 }
673 
getTopLoops()674 std::vector<Loop*> LoopDetection::getTopLoops()
675 {
676     recomputeIfStale();
677 
678     return topLoops;
679 }
680 
getInnerMostLoop(const G4_BB * bb)681 Loop* Loop::getInnerMostLoop(const G4_BB* bb)
682 {
683     // if current loop contains bb, recurse loop tree and return
684     // most nested loop containing it.
685     // if current loop doesnt contain bb then return nullptr.
686     if (!contains(bb))
687         return nullptr;
688 
689     for (auto& nested : immNested)
690         if(auto innerMost = nested->getInnerMostLoop(bb))
691             return innerMost;
692 
693     return this;
694 }
695 
getLoopExits()696 std::vector<G4_BB*>& Loop::getLoopExits()
697 {
698     // already computed before, so return old list
699     if (loopExits.size() > 0)
700         return loopExits;
701 
702     // list all successors of loop BBs that are themselves not part of loop, ie loop exits.
703     // this loop may add duplicate entries to exits. those are cleaned up later.
704     std::list<G4_BB*> exits;
705     for (auto bb : BBs)
706     {
707         for (auto succ : bb->Succs)
708         {
709             if (contains(succ))
710                 continue;
711             exits.push_back(succ);
712         }
713     }
714 
715     // sort exits found by bbid
716     exits.sort([](G4_BB* bb1, G4_BB* bb2) { return bb1->getId() < bb2->getId(); });
717     // remove duplicates
718     exits.unique();
719     // transfer data to class member for future invocations
720     std::for_each(exits.begin(), exits.end(), [&](G4_BB* bb) { loopExits.push_back(bb); });
721 
722     return loopExits;
723 }
724 
getPreheader(Loop * loop)725 G4_BB* LoopDetection::getPreheader(Loop* loop)
726 {
727     if (loop->preHeader)
728         return loop->preHeader;
729 
730     // return pre-header if one exists, otherwise create a new one and return it
731     auto header = loop->getHeader();
732     auto headerPreds = header->Preds;
733 
734     // for a BB to be valid pre-header, it needs to fulfill following criteria:
735     // 1. BB should be a predecessor of loop header,
736     // 2. BB should dominate header
737     // 3. BB's only successor should be loop header
738     // 4. Loop header should've no other predecessor outside the loop
739 
740     G4_BB* enteringNode = nullptr;
741     bool found = false;
742     for (auto pred : headerPreds)
743     {
744         if (loop->contains(pred))
745             continue;
746 
747         if (!enteringNode)
748         {
749             enteringNode = pred;
750             found = true;
751         }
752         else
753         {
754             found = false;
755             break;
756         }
757 
758         if (!enteringNode->dominates(header))
759         {
760             found = false;
761             break;
762         }
763 
764         if (enteringNode->Succs.size() > 1)
765         {
766             found = false;
767             break;
768         }
769     }
770 
771     if (found && enteringNode)
772     {
773         // entering node is legal preheader for loop
774         loop->preHeader = enteringNode;
775         return enteringNode;
776     }
777 
778     // a valid pre-header wasnt found, so create one and return it
779     // unless any pred uses SIMD CF goto in to loop header
780     if (header->getLabel())
781     {
782         auto headerLblStr = std::string(header->getLabel()->getLabel());
783         for (auto pred : headerPreds)
784         {
785             if (pred->isEndWithGoto())
786             {
787                 auto gotoInst = pred->back()->asCFInst();
788                 auto jipStr = std::string(gotoInst->getJipLabelStr());
789                 auto uipStr = std::string(gotoInst->getUipLabelStr());
790                 if (jipStr == headerLblStr ||
791                     uipStr == headerLblStr)
792                 {
793                     // dont create preheader for this loop as a predecessor
794                     // of header has SIMD CF in to loop header.
795                     return nullptr;
796                 }
797             }
798             else if (pred->size() > 0 &&
799                 pred->back()->opcode() == G4_jmpi)
800             {
801                 // TODO: may be safe to alter CF by changing jmpi destination
802                 // to preheader instead of header.
803                 return nullptr;
804             }
805         }
806     }
807 
808     G4_BB* preHeader = kernel.fg.createNewBBWithLabel("preHeader");
809     for (auto pred : headerPreds)
810     {
811         if (loop->contains(pred))
812             continue;
813 
814         kernel.fg.removePredSuccEdges(pred, header);
815         kernel.fg.addPredSuccEdges(pred, preHeader);
816     }
817     kernel.fg.addPredSuccEdges(preHeader, header);
818 
819     for (auto bbIt = kernel.fg.begin(); bbIt != kernel.fg.end(); ++bbIt)
820     {
821         if (*bbIt == header)
822         {
823             kernel.fg.insert(bbIt, preHeader);
824             break;
825         }
826     }
827 
828     loop->preHeader = preHeader;
829     if (loop->parent)
830         loop->parent->addBBToLoopHierarchy(preHeader);
831 
832     // adding/deleted CFG edges causes loop information to become
833     // stale. we fix this by inserting preheader BB to all
834     // parent loops. and then we set valid flag so no recomputation
835     // is needed.
836     setValid();
837 
838     return preHeader;
839 }
840 
getInnerMostLoop(const G4_BB * bb)841 Loop* LoopDetection::getInnerMostLoop(const G4_BB* bb)
842 {
843     recomputeIfStale();
844 
845     for (auto& loop : topLoops)
846         if (auto innerMost = loop->getInnerMostLoop(bb))
847             return innerMost;
848 
849     return nullptr;
850 }
851 
computePreheaders()852 void LoopDetection::computePreheaders()
853 {
854     recomputeIfStale();
855 
856     for (auto& loop : allLoops)
857     {
858         (void)getPreheader(&loop);
859     }
860 }
861 
reset()862 void LoopDetection::reset()
863 {
864     allLoops.clear();
865     topLoops.clear();
866 
867     PreIdRPostId.clear();
868 
869     setStale();
870 }
871 
872 // Adapted from FlowGraph::DFSTraverse.
873 // No changes are made to any G4_BB member or to FlowGraph.
DFSTraverse(const G4_BB * startBB,unsigned & preId,unsigned & postId,BackEdges & bes)874 void LoopDetection::DFSTraverse(const G4_BB* startBB, unsigned& preId, unsigned& postId, BackEdges& bes)
875 {
876     std::stack<const G4_BB*> traversalStack;
877     traversalStack.push(startBB);
878 
879     auto getPreId = [&](const G4_BB* bb)
880     {
881         return PreIdRPostId[bb].first;
882     };
883 
884     auto getRPostId = [&](const G4_BB* bb)
885     {
886         return PreIdRPostId[bb].second;
887     };
888 
889     auto setPreId = [&](const G4_BB* bb, unsigned int id)
890     {
891         PreIdRPostId[bb].first = id;
892     };
893 
894     auto setRPostId = [&](const G4_BB* bb, unsigned int id)
895     {
896         PreIdRPostId[bb].second = id;
897     };
898 
899     while (!traversalStack.empty())
900     {
901         auto bb = traversalStack.top();
902         if (getPreId(bb) != UINT_MAX)
903         {
904             // Pre-processed already and continue to the next one.
905             // Before doing so, set postId if not set before.
906             traversalStack.pop();
907             if (getRPostId(bb) == UINT_MAX)
908             {
909             // All bb's succ has been visited (PreId is set) at this time.
910             // if any of its succ has not been finished (RPostId not set),
911             // bb->succ forms a backedge.
912             //
913             // Note: originally, CALL and EXIT will not check back-edges, here
914             //       we skip checking for them as well. (INIT & RETURN should
915             //       be checked as well ?)
916             if (!(bb->getBBType() & (G4_BB_CALL_TYPE | G4_BB_EXIT_TYPE)))
917             {
918                 for (auto succBB : bb->Succs)
919                 {
920                     if (getRPostId(succBB) == UINT_MAX)
921                     {
922                         BackEdge be = std::make_pair(const_cast<G4_BB*>(bb), succBB);
923                         bes.push_back(be);
924                     }
925                 }
926             }
927 
928             // Need to keep this after backedge checking so that self-backedge
929             // (single-bb loop) will not be missed.
930             setRPostId(bb, postId++);
931             }
932             continue;
933         }
934 
935         setPreId(bb, preId++);
936 
937         if (bb->getBBType() & G4_BB_CALL_TYPE)
938         {
939             const G4_BB* returnBB = bb->BBAfterCall();
940 
941             if (getPreId(returnBB) == UINT_MAX)
942             {
943                 traversalStack.push(returnBB);
944             }
945             else
946             {
947                 MUST_BE_TRUE(false, ERROR_FLOWGRAPH);
948             }
949         }
950         else if (bb->getBBType() & G4_BB_EXIT_TYPE)
951         {
952             // Skip
953         }
954         else
955         {
956             // To be consistent with previous behavior, use reverse_iter.
957             auto RIE = bb->Succs.rend();
958             for (auto rit = bb->Succs.rbegin(); rit != RIE; ++rit)
959             {
960                 const G4_BB* succBB = *rit;
961                 if (getPreId(succBB) == UINT_MAX)
962                 {
963                     traversalStack.push(succBB);
964                 }
965             }
966         }
967     }
968 }
969 
findDominatingBackEdges(BackEdges & bes)970 void LoopDetection::findDominatingBackEdges(BackEdges& bes)
971 {
972     const auto& BBs = fg.getBBList();
973 
974     for (auto& bb : BBs)
975     {
976         PreIdRPostId[bb] = std::make_pair(UINT_MAX, UINT_MAX);
977     }
978 
979     unsigned preId = 0;
980     unsigned postID = 0;
981 
982     DFSTraverse(fg.getEntryBB(), preId, postID, bes);
983 
984     for (auto fn : fg.funcInfoTable)
985     {
986         DFSTraverse(fn->getInitBB(), preId, postID, bes);
987     }
988 }
989 
populateLoop(BackEdge & backEdge)990 void LoopDetection::populateLoop(BackEdge& backEdge)
991 {
992     // check whether dst dominates src
993     auto src = const_cast<G4_BB*>(backEdge.first);
994     auto dst = const_cast<G4_BB*>(backEdge.second);
995 
996     auto& domSecond = fg.getDominator().getDom(src);
997     if (domSecond.find(dst) != domSecond.end())
998     {
999         // this is a natural loop back edge. populate all bbs in loop.
1000         Loop newLoop(backEdge);
1001         newLoop.id = allLoops.size() + 1;
1002 
1003         newLoop.addBBToLoop(src);
1004         newLoop.addBBToLoop(dst);
1005 
1006         std::stack<G4_BB*> traversal;
1007         traversal.push(src);
1008         while (!traversal.empty())
1009         {
1010             auto bb = traversal.top();
1011             traversal.pop();
1012 
1013             // check whether bb's preds are dominated by loop header.
1014             // if yes, add them to traversal.
1015             for (auto predIt = bb->Preds.begin(); predIt != bb->Preds.end(); ++predIt)
1016             {
1017                 auto pred = (*predIt);
1018                 // pred is loop header, which is already added to list of loop BBs
1019                 if (pred == dst)
1020                     continue;
1021 
1022                 if (dst->dominates(pred) &&
1023                     !newLoop.contains(pred))
1024                 {
1025                     // pred is part of loop
1026                     newLoop.addBBToLoop(pred);
1027                     traversal.push(pred);
1028                 }
1029             }
1030         }
1031 
1032         (void)newLoop.getLoopExits();
1033         allLoops.emplace_back(newLoop);
1034     }
1035 }
1036 
computeLoopTree()1037 void LoopDetection::computeLoopTree()
1038 {
1039     // create loop tree by iterating over allLoops in descending order
1040     // of BB count.
1041     std::vector<Loop*> sortedLoops;
1042     std::for_each(allLoops.begin(), allLoops.end(), [&](Loop& l) { sortedLoops.push_back(&l); });
1043 
1044     // sorting loops by size of contained BBs makes it easy to create
1045     // tree structure relationship of loops.
1046     // 1. If loop A has more BBs than loop B then A is either some parent of B or no relationship exists.
1047     // 2. For loop A to be a parent of loop B, all BBs of loop B have to be contained in loop A as well.
1048     //
1049     // processing loops in sorted order of BB size guarantees that we'll create tree in top-down order.
1050     // we'll never encounter a situation where new loop to be added to tree is parent of an existing
1051     // loop already present in the tree.
1052     std::sort(sortedLoops.begin(), sortedLoops.end(), [](Loop* l1, Loop* l2) { return l2->getBBSize() < l1->getBBSize(); });
1053 
1054     for (auto loop : sortedLoops)
1055     {
1056         addLoop(loop, nullptr);
1057     }
1058 }
1059 
addLoop(Loop * newLoop,Loop * aParent)1060 void LoopDetection::addLoop(Loop* newLoop, Loop* aParent)
1061 {
1062     if (topLoops.size() == 0)
1063     {
1064         topLoops.push_back(newLoop);
1065         return;
1066     }
1067 
1068     // find a place in existing loop tree to insert new loop passed in.
1069     // following scenarios exist:
1070     // a. loop is nested loop of an existing loop,
1071     // b. loop is not nested but is sibling of existing loop,
1072     // c. loop is top level parent loop of a certain tree
1073 
1074     // check if newLoop fits in to any existing current top level loop
1075     auto siblings = aParent ? aParent->getAllSiblings(topLoops) : topLoops;
1076     for (auto& sibling : siblings)
1077     {
1078         if (newLoop->fullSubset(sibling))
1079         {
1080             if (sibling->immNested.size() > 0)
1081             {
1082                 addLoop(newLoop, sibling->immNested[0]);
1083             }
1084             else
1085             {
1086                 sibling->immNested.push_back(newLoop);
1087                 newLoop->parent = sibling;
1088             }
1089             return;
1090         }
1091         else if (newLoop->fullSuperset(sibling))
1092         {
1093             MUST_BE_TRUE(false, "Not expecting to see parent loop here");
1094             return;
1095         }
1096     }
1097 
1098     // add new sibling to current level
1099     newLoop->parent = siblings[0]->parent;
1100     if (newLoop->parent)
1101     {
1102         siblings[0]->parent->immNested.push_back(newLoop);
1103     }
1104     else
1105     {
1106         topLoops.push_back(newLoop);
1107     }
1108 }
1109 
run()1110 void LoopDetection::run()
1111 {
1112     BackEdges backEdges;
1113     findDominatingBackEdges(backEdges);
1114     for (auto& be : backEdges)
1115     {
1116         populateLoop(be);
1117     }
1118 
1119     computeLoopTree();
1120 
1121     setValid();
1122 }
1123 
dump(std::ostream & os)1124 void LoopDetection::dump(std::ostream& os)
1125 {
1126     if(isStale())
1127         os << "Loop info is stale.\n";
1128 
1129     os << "\n\n\nLoop tree:\n";
1130 
1131     for (auto loop : topLoops)
1132     {
1133         loop->dump(os);
1134     }
1135 }
1136 
1137 // add bb to current loop and to all valid parents
addBBToLoopHierarchy(G4_BB * bb)1138 void Loop::addBBToLoopHierarchy(G4_BB* bb)
1139 {
1140     addBBToLoop(bb);
1141 
1142     if (parent)
1143         parent->addBBToLoopHierarchy(bb);
1144 }
1145 
addBBToLoop(G4_BB * bb)1146 void vISA::Loop::addBBToLoop(G4_BB* bb)
1147 {
1148     if (!contains(bb))
1149     {
1150         BBs.push_back(bb);
1151         BBsLookup.insert(bb);
1152     }
1153 }
1154 
fullSubset(Loop * other)1155 bool Loop::fullSubset(Loop* other)
1156 {
1157     if (BBs.size() > other->BBs.size())
1158         return false;
1159 
1160     // to avoid O(N^2) lookup, use unordered set of other loop's BBs for lookup
1161     auto& otherBBs = other->BBsLookup;
1162 
1163     // check whether current loop's all BBs are fully contained in "other" loop
1164     for (auto bb : BBs)
1165     {
1166         if (otherBBs.find(bb) == otherBBs.end())
1167             return false;
1168     }
1169 
1170     return true;
1171 }
1172 
fullSuperset(Loop * other)1173 bool Loop::fullSuperset(Loop* other)
1174 {
1175     return other->fullSubset(this);
1176 }
1177 
getAllSiblings(std::vector<Loop * > & topLoops)1178 std::vector<Loop*> Loop::getAllSiblings(std::vector<Loop*>& topLoops)
1179 {
1180     if (parent)
1181         return parent->immNested;
1182 
1183     return topLoops;
1184 }
1185 
getNestingLevel()1186 unsigned int Loop::getNestingLevel()
1187 {
1188     if (!parent)
1189         return 1;
1190 
1191     return parent->getNestingLevel()+1;
1192 }
1193 
dump(std::ostream & os)1194 void Loop::dump(std::ostream& os)
1195 {
1196     auto nestingLevel = getNestingLevel();
1197     nestingLevel = nestingLevel > 0 ? nestingLevel : 1;
1198     for (unsigned int i = 0; i != nestingLevel - 1; ++i)
1199     {
1200         os << "\t";
1201     }
1202     os << "L" << id << ": - { ";
1203     for (auto bb : BBs)
1204     {
1205         os << bb->getId();
1206         if (bb != BBs.back())
1207             os << ", ";
1208     }
1209     os << " } ";
1210 
1211     auto labelStr = std::string("BB") + (preHeader ? std::to_string(preHeader->getId()) :
1212         std::string("--"));
1213 
1214     if (preHeader && preHeader->getLabel())
1215         labelStr += "(" + std::string(preHeader->getLabel()->getLabel()) + ")";
1216 
1217     std::string exitBBs = "{ ";
1218     for (auto bb : loopExits)
1219     {
1220         exitBBs += "BB" + std::to_string(bb->getId());
1221         if (bb != loopExits.back())
1222             exitBBs += ", ";
1223     }
1224     exitBBs += " }";
1225 
1226     os << " BE: {BB" << be.first->getId() << " -> BB" << be.second->getId() << "}, " <<
1227         "PreHeader: " << labelStr << ", " << "Loop exits: " << exitBBs << std::endl;
1228 
1229     for (auto& nested : immNested)
1230     {
1231         nested->dump(os);
1232     }
1233 }
1234 
contains(const G4_BB * bb)1235 bool Loop::contains(const G4_BB* bb)
1236 {
1237     return BBsLookup.find(bb) != BBsLookup.end();
1238 }
1239 
isUniqueDef(G4_DstRegRegion * dst)1240 bool VarReferences::isUniqueDef(G4_DstRegRegion* dst)
1241 {
1242     recomputeIfStale();
1243 
1244     auto dcl = dst->getTopDcl();
1245 
1246     // return true if spilled variable has a single static def
1247     // and it is not live-in to current bb (eg, loop, sub).
1248     if (getDefCount(dcl) != 1)
1249     {
1250         // check whether multiple defs exist in program for current
1251         // lb, rb
1252         auto lb = dst->getLeftBound();
1253         auto rb = dst->getRightBound();
1254 
1255         unsigned int count = 0;
1256         const auto& defs = getDefs(dcl);
1257         for (auto& def : *defs)
1258         {
1259             if (std::get<2>(def) <= rb &&
1260                 std::get<3>(def) >= lb)
1261                 ++count;
1262         }
1263 
1264         if (count > 1)
1265             return false;
1266     }
1267 
1268     return true;
1269 }
1270 
getDefCount(G4_Declare * dcl)1271 unsigned int VarReferences::getDefCount(G4_Declare* dcl)
1272 {
1273     auto defs = getDefs(dcl);
1274     if (defs)
1275         return defs->size();
1276     return 0;
1277 }
1278 
getUseCount(G4_Declare * dcl)1279 unsigned int VarReferences::getUseCount(G4_Declare* dcl)
1280 {
1281     auto uses = getUses(dcl);
1282     if (uses)
1283         return uses->size();
1284     return 0;
1285 }
1286 
getDefs(G4_Declare * dcl)1287 const VarReferences::Defs* VarReferences::getDefs(G4_Declare* dcl)
1288 {
1289     recomputeIfStale();
1290 
1291     auto it = VarRefs.find(dcl);
1292     if (it != VarRefs.end())
1293         return &(it->second.first);
1294 
1295     return nullptr;
1296 }
1297 
getUses(G4_Declare * dcl)1298 const VarReferences::Uses* VarReferences::getUses(G4_Declare* dcl)
1299 {
1300     recomputeIfStale();
1301 
1302     auto it = VarRefs.find(dcl);
1303     if (it != VarRefs.end())
1304         return &(it->second.second);
1305 
1306     return nullptr;
1307 }
1308 
reset()1309 void VarReferences::reset()
1310 {
1311     VarRefs.clear();
1312 
1313     setStale();
1314 }
1315 
run()1316 void VarReferences::run()
1317 {
1318     for (auto bb : kernel.fg)
1319     {
1320         for (auto inst : *bb)
1321         {
1322             if (inst->isPseudoKill())
1323                 continue;
1324 
1325             auto dst = inst->getDst();
1326 
1327             if (dst && !dst->isNullReg())
1328             {
1329                 auto topdcl = dst->getTopDcl();
1330 
1331                 if (topdcl)
1332                 {
1333                     auto lb = dst->getLeftBound();
1334                     auto rb = dst->getRightBound();
1335                     auto& Defs = VarRefs[topdcl].first;
1336                     Defs.push_back(std::make_tuple(inst, bb, lb, rb));
1337                 }
1338             }
1339 
1340             for (unsigned int i = 0; i != inst->getNumSrc(); ++i)
1341             {
1342                 auto src = inst->getSrc(i);
1343                 if (src && src->isSrcRegRegion())
1344                 {
1345                     auto topdcl = src->asSrcRegRegion()->getTopDcl();
1346                     if (topdcl)
1347                     {
1348                         auto& Uses = VarRefs[topdcl].second;
1349                         Uses.push_back(std::make_tuple(inst, bb));
1350                     }
1351                 }
1352             }
1353         }
1354     }
1355 
1356     setValid();
1357 }
1358 
dump(std::ostream & os)1359 void VarReferences::dump(std::ostream& os)
1360 {
1361     if (isStale())
1362         os << "Data is stale.\n";
1363 
1364     os << "#Dcls with defs/uses: " << VarRefs.size();
1365 }
1366