1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "LoopAnalysis.h"
10 #include "G4_Kernel.hpp"
11 #include "G4_BB.hpp"
12 #include "BitSet.h"
13
14 using namespace vISA;
15
InterSect(G4_BB * bb,int i,int k)16 G4_BB* ImmDominator::InterSect(G4_BB* bb, int i, int k)
17 {
18 recomputeIfStale();
19
20 G4_BB* finger1 = immDoms[bb->getId()][i];
21 G4_BB* finger2 = immDoms[bb->getId()][k];
22
23 while ((finger1 != finger2) &&
24 (finger1 != nullptr) &&
25 (finger2 != nullptr))
26 {
27 if (finger1->getPreId() == finger2->getPreId())
28 {
29 assert(finger1 == kernel.fg.getEntryBB() || finger2 == kernel.fg.getEntryBB());
30 return kernel.fg.getEntryBB();
31 }
32
33 while ((iDoms[finger1->getId()] != nullptr) &&
34 (finger1->getPreId() > finger2->getPreId()))
35 {
36 finger1 = iDoms[finger1->getId()];
37 immDoms[bb->getId()][i] = finger1;
38 }
39
40 while ((iDoms[finger2->getId()] != nullptr) &&
41 (finger2->getPreId() > finger1->getPreId()))
42 {
43 finger2 = iDoms[finger2->getId()];
44 immDoms[bb->getId()][k] = finger2;
45 }
46
47 if ((iDoms[finger2->getId()] == nullptr) ||
48 (iDoms[finger1->getId()] == nullptr))
49 {
50 break;
51 }
52 }
53
54 if (finger1 == finger2)
55 {
56 return finger1;
57 }
58 else if (finger1->getPreId() > finger2->getPreId())
59 {
60 return finger2;
61 }
62 else
63 {
64 return finger1;
65 }
66 }
67
68 /*
69 * An improvement on the algorithm from "A Simple, Fast Dominance Algorithm"
70 * 1. Single pred assginment.
71 * 2. To reduce the back trace in the intersect function, a temp buffer for predictor of each nodes is used to record the back trace result.
72 */
runIDOM()73 void ImmDominator::runIDOM()
74 {
75 iDoms.resize(kernel.fg.size());
76 immDoms.resize(kernel.fg.size());
77
78 for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
79 {
80 auto bb = *I;
81 iDoms[bb->getId()] = nullptr;
82 immDoms[bb->getId()].resize(bb->Preds.size());
83
84 size_t i = 0;
85 for (auto pred : bb->Preds)
86 {
87 immDoms[bb->getId()][i] = pred;
88 i++;
89 }
90 }
91
92 entryBB = kernel.fg.getEntryBB();
93 iDoms[entryBB->getId()] = { entryBB };
94
95 // Actual dom computation
96 bool change = true;
97 while (change)
98 {
99 change = false;
100 for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
101 {
102 auto bb = *I;
103 if (bb == entryBB)
104 continue;
105
106 if (bb->Preds.size() == 1)
107 {
108 if (iDoms[bb->getId()] == nullptr)
109 {
110 iDoms[bb->getId()] = (*bb->Preds.begin());
111 change = true;
112 }
113 else
114 {
115 assert(iDoms[bb->getId()] == (*bb->Preds.begin()));
116 }
117 }
118 else
119 {
120 G4_BB* tmpIdom = nullptr;
121 int i = 0;
122 for (auto pred : bb->Preds)
123 {
124 if (iDoms[pred->getId()] != nullptr)
125 {
126 tmpIdom = pred;
127 break;
128 }
129 i++;
130 }
131
132 if (tmpIdom != nullptr)
133 {
134 int k = 0;
135 for (auto pred : bb->Preds)
136 {
137 if (k == i)
138 {
139 k++;
140 continue;
141 }
142
143 if (iDoms[pred->getId()] != nullptr)
144 {
145 tmpIdom = InterSect(bb, i, k);
146 }
147 k++;
148 }
149
150 if (iDoms[bb->getId()] == nullptr ||
151 iDoms[bb->getId()] != tmpIdom)
152 {
153 iDoms[bb->getId()] = tmpIdom;
154 change = true;
155 }
156 }
157 }
158 }
159 }
160 }
161
runDOM()162 void Dominator::runDOM()
163 {
164 // Use bit-set to represent BBs in dominator computation.
165
166 // domBS holds a bitset for every BB in program. Bitset instance
167 // at given index represents list of BBs that dominate BB#index.
168 std::vector<BitSet> domsBS;
169
170 // Construct bitsets corresponding to all BBs. Initialize all
171 // bits to 1.
172 for (unsigned int i = 0; i != kernel.fg.size(); ++i)
173 domsBS.emplace_back(kernel.fg.size(), true);
174
175 entryBB = kernel.fg.getEntryBB();
176
177 MUST_BE_TRUE(entryBB != nullptr, "Entry BB not found!");
178
179 // In dominator computation, entryBB is it's only dominator. All
180 // other BBs have dominator set to every other BB in program.
181 domsBS[entryBB->getId()].clear();
182 domsBS[entryBB->getId()].set(entryBB->getId(), true);
183
184 // Actual dom computation
185 bool change = true;
186 BitSet oldBS(kernel.fg.size(), false);
187 while (change)
188 {
189 change = false;
190 for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
191 {
192 auto bb = *I;
193 if (bb == entryBB)
194 continue;
195
196 oldBS.swap(domsBS[bb->getId()]);
197
198 domsBS[bb->getId()].setAll();
199 // Dominators[BB] = Intersection(Dominators[All Pred BBs]) + BB
200 for (auto preds : bb->Preds)
201 {
202 domsBS[bb->getId()] &= domsBS[preds->getId()];
203 }
204
205 // Add bb to it's own dominator set
206 domsBS[bb->getId()].set(bb->getId(), true);
207
208 if (oldBS != domsBS[bb->getId()])
209 {
210 // Dominator for bb was modified
211 change = true;
212 break;
213 }
214 }
215 }
216
217 std::vector<G4_BB*> indexedBBs;
218 indexedBBs.resize(kernel.fg.size());
219 for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
220 {
221 auto BB = *I;
222 indexedBBs[BB->getId()] = BB;
223 }
224
225 Doms.resize(kernel.fg.size());
226 for (auto I = kernel.fg.cbegin(), E = kernel.fg.cend(); I != E; ++I)
227 {
228 auto BB = *I;
229
230 auto& domBS = domsBS[BB->getId()];
231
232 for (unsigned int i = 0; i != domsBS.size(); ++i)
233 {
234 if (domBS.isSet(i))
235 {
236 auto domBB = indexedBBs[i];
237 Doms[BB->getId()].insert(domBB);
238 }
239 }
240 }
241 }
242
243
getDom(G4_BB * bb)244 std::unordered_set<G4_BB*>& Dominator::getDom(G4_BB* bb)
245 {
246 recomputeIfStale();
247
248 return Doms[bb->getId()];
249 }
250
getImmDom(G4_BB * bb)251 std::vector<G4_BB*>& ImmDominator::getImmDom(G4_BB* bb)
252 {
253 recomputeIfStale();
254
255 return immDoms[bb->getId()];
256 }
257
updateImmDom()258 void ImmDominator::updateImmDom()
259 {
260 std::vector<BitSet> domBits(kernel.fg.size());
261
262 for (size_t i = 0; i < kernel.fg.size(); i++)
263 {
264 domBits[i] = BitSet(unsigned(kernel.fg.size()), false);
265 }
266
267 // Update immDom vector with correct ordering
268 for (auto bb : kernel.fg)
269 {
270 auto& DomBBs = kernel.fg.getDominator().getDom(bb);
271
272 for (auto domBB : DomBBs)
273 {
274 domBits[bb->getId()].set(domBB->getId(), true);
275 }
276 }
277
278 iDoms.resize(kernel.fg.size());
279 for (auto bb : kernel.fg)
280 {
281 auto& DomBBs = kernel.fg.getDominator().getDom(bb);
282 BitSet tmpBits = domBits[bb->getId()];
283 tmpBits.set(bb->getId(), false);
284 iDoms[bb->getId()] = bb;
285
286 for (auto domBB : DomBBs)
287 {
288 if (domBB == bb)
289 continue;
290
291 if (tmpBits == domBits[domBB->getId()])
292 {
293 iDoms[bb->getId()] = domBB;
294 }
295 }
296 }
297 }
298
reset()299 void ImmDominator::reset()
300 {
301 iDoms.clear();
302 immDoms.clear();
303
304 setStale();
305 }
306
run()307 void ImmDominator::run()
308 {
309 // this function re-runs analysis. caller needs to check if
310 // analysis is stale.
311 entryBB = kernel.fg.getEntryBB();
312
313 runIDOM();
314
315 setValid();
316 }
317
dump(std::ostream & os)318 void ImmDominator::dump(std::ostream& os)
319 {
320 if (isStale())
321 os << "Imm dominator data is stale.\n";
322
323 os << "\n\nImm dom:\n";
324 dumpImmDom(os);
325 }
326
reset()327 void Dominator::reset()
328 {
329 Doms.clear();
330
331 setStale();
332 }
333
run()334 void Dominator::run()
335 {
336 // this function re-runs analysis. caller needs to check if
337 // analysis is stale.
338 entryBB = kernel.fg.getEntryBB();
339
340 runDOM();
341
342 setValid();
343 }
344
dump(std::ostream & os)345 void Dominator::dump(std::ostream& os)
346 {
347 if (isStale())
348 os << "Dominator data is stale.\n";
349
350 os << "Dom:\n";
351 dumpDom(os);
352 }
353
getIDoms()354 const std::vector<G4_BB*>& ImmDominator::getIDoms()
355 {
356 recomputeIfStale();
357
358 return iDoms;
359 }
360
getCommonImmDom(const std::unordered_set<G4_BB * > & bbs)361 G4_BB* ImmDominator::getCommonImmDom(const std::unordered_set<G4_BB*>& bbs)
362 {
363 recomputeIfStale();
364
365 if (bbs.size() == 0)
366 return nullptr;
367
368 unsigned int maxId = (*bbs.begin())->getId();
369
370 auto commonImmDoms = getImmDom(*bbs.begin());
371 for (auto bb : bbs)
372 {
373 maxId = std::max(maxId, bb->getId());
374
375 const auto& DomBB = kernel.fg.getDominator().getDom(bb);
376 for (G4_BB*& dom : commonImmDoms)
377 {
378 if (dom != nullptr && DomBB.find(dom) == DomBB.end())
379 {
380 dom = nullptr;
381 }
382 }
383 }
384
385 // Return first imm dom that is not a BB from bbs set
386 for (G4_BB* dom : commonImmDoms)
387 {
388 if (dom &&
389 // Common imm pdom must be lexically last BB
390 dom->getId() >= maxId &&
391 ((dom->size() > 1 && dom->front()->isLabel()) ||
392 (dom->size() > 0 && !dom->front()->isLabel())))
393 {
394 return dom;
395 }
396 }
397
398 return entryBB;
399 }
400
dumpImmDom(std::ostream & os)401 void ImmDominator::dumpImmDom(std::ostream& os)
402 {
403 for (auto bb : kernel.fg)
404 {
405 os << "BB" << bb->getId() << " - ";
406 auto& domBBs = immDoms[bb->getId()];
407 for (auto domBB : domBBs)
408 {
409 os << "BB" << domBB->getId();
410 if (domBB->getLabel())
411 {
412 os << " (" << domBB->getLabel()->getLabel() << ")";
413 }
414 os << ", ";
415 }
416 os << "\n";
417 }
418 }
419
dumpDom(std::ostream & os)420 void vISA::Dominator::dumpDom(std::ostream& os)
421 {
422 for (auto bb : kernel.fg)
423 {
424 os << "BB" << bb->getId() << " - ";
425 auto& domBBs = Doms[bb->getId()];
426 for (auto domBB : domBBs)
427 {
428 os << "BB" << domBB->getId();
429 if (domBB->getLabel())
430 {
431 os << " (" << domBB->getLabel()->getLabel() << ")";
432 }
433 os << ", ";
434 }
435 os << "\n";
436 }
437 }
438
439 // return true if bb1 dominates bb2
dominates(G4_BB * bb1,G4_BB * bb2)440 bool Dominator::dominates(G4_BB* bb1, G4_BB* bb2)
441 {
442 recomputeIfStale();
443
444 auto& dom = getDom(bb2);
445 if (dom.find(bb1) != dom.end())
446 return true;
447
448 return false;
449 }
450
recomputeIfStale()451 void Analysis::recomputeIfStale()
452 {
453 if (!isStale() || inProgress)
454 return;
455
456 inProgress = true;
457 reset();
458 run();
459 inProgress = false;
460 }
461
PostDom(G4_Kernel & k)462 PostDom::PostDom(G4_Kernel& k) : kernel(k)
463 {
464 }
465
reset()466 void PostDom::reset()
467 {
468 postDoms.clear();
469 immPostDoms.clear();
470
471 setStale();
472 }
473
run()474 void PostDom::run()
475 {
476 exitBB = nullptr;
477 auto numBBs = kernel.fg.size();
478 postDoms.resize(numBBs);
479 immPostDoms.resize(numBBs);
480
481 for (auto bb_rit = kernel.fg.rbegin(); bb_rit != kernel.fg.rend(); bb_rit++)
482 {
483 auto bb = *bb_rit;
484 if (bb->size() > 0)
485 {
486 auto lastInst = bb->back();
487 if (lastInst->isEOT())
488 {
489 exitBB = bb;
490 break;
491 }
492 }
493 }
494
495 MUST_BE_TRUE(exitBB != nullptr, "Exit BB not found!");
496
497 postDoms[exitBB->getId()] = { exitBB };
498 std::unordered_set<G4_BB*> allBBs(kernel.fg.cbegin(), kernel.fg.cend());
499
500 for (auto bb : kernel.fg)
501 {
502 if (bb != exitBB)
503 {
504 postDoms[bb->getId()] = allBBs;
505 }
506 }
507
508 // Actual post dom computation
509 bool change = true;
510 while (change)
511 {
512 change = false;
513 for (auto bb : kernel.fg)
514 {
515 if (bb == exitBB)
516 continue;
517
518 std::unordered_set<G4_BB*> tmp = { bb };
519 // Compute intersection of pdom of successors
520 std::unordered_map<G4_BB*, unsigned> numInstances;
521 for (auto succs : bb->Succs)
522 {
523 auto& pdomSucc = postDoms[succs->getId()];
524 for (auto pdomSuccBB : pdomSucc)
525 {
526 auto it = numInstances.find(pdomSuccBB);
527 if (it == numInstances.end())
528 numInstances.insert(std::make_pair(pdomSuccBB, 1));
529 else
530 it->second = it->second + 1;
531 }
532 }
533
534 // Common BBs appear in numInstances map with second value == bb->Succs count
535 for (auto commonBBs : numInstances)
536 {
537 if (commonBBs.second == bb->Succs.size())
538 tmp.insert(commonBBs.first);
539 }
540
541 // Check if postDom set changed for bb in current iter
542 if (tmp.size() != postDoms[bb->getId()].size())
543 {
544 postDoms[bb->getId()] = tmp;
545 change = true;
546 continue;
547 }
548 else
549 {
550 auto& pdomBB = postDoms[bb->getId()];
551 for (auto tmpBB : tmp)
552 {
553 if (pdomBB.find(tmpBB) == pdomBB.end())
554 {
555 postDoms[bb->getId()] = tmp;
556 change = true;
557 break;
558 }
559 if (change)
560 break;
561 }
562 }
563 }
564 }
565
566 setValid();
567 updateImmPostDom();
568 }
569
getPostDom(G4_BB * bb)570 std::unordered_set<G4_BB*>& PostDom::getPostDom(G4_BB* bb)
571 {
572 recomputeIfStale();
573
574 return postDoms[bb->getId()];
575 }
576
dumpImmDom(std::ostream & os)577 void PostDom::dumpImmDom(std::ostream& os)
578 {
579 if (isStale())
580 os << "PostDom data is stale.\n";
581
582 for (auto bb : kernel.fg)
583 {
584 os << "BB" << bb->getId();
585 auto& pdomBBs = immPostDoms[bb->getId()];
586 for (auto pdomBB : pdomBBs)
587 {
588 os << "BB" << pdomBB->getId();
589 if (pdomBB->getLabel())
590 {
591 os << " (" << pdomBB->getLabel()->getLabel() << ")";
592 }
593 os << ", ";
594 }
595 os << "\n";
596 }
597 }
598
getImmPostDom(G4_BB * bb)599 std::vector<G4_BB*>& PostDom::getImmPostDom(G4_BB* bb)
600 {
601 recomputeIfStale();
602
603 return immPostDoms[bb->getId()];
604 }
605
updateImmPostDom()606 void PostDom::updateImmPostDom()
607 {
608 // Update immPostDom vector with correct ordering
609 for (auto bb : kernel.fg)
610 {
611 auto& postDomBBs = postDoms[bb->getId()];
612 auto& immPostDomBB = immPostDoms[bb->getId()];
613 immPostDomBB.resize(postDomBBs.size());
614 immPostDomBB[0] = bb;
615
616 for (auto pdomBB : postDomBBs)
617 {
618 if (pdomBB == bb)
619 continue;
620
621 immPostDomBB[postDomBBs.size() - postDoms[pdomBB->getId()].size()] = pdomBB;
622 }
623 }
624 }
625
getCommonImmDom(std::unordered_set<G4_BB * > & bbs)626 G4_BB* PostDom::getCommonImmDom(std::unordered_set<G4_BB*>& bbs)
627 {
628 recomputeIfStale();
629
630 if (bbs.size() == 0)
631 return nullptr;
632
633 unsigned maxId = (*bbs.begin())->getId();
634
635 auto commonImmDoms = getImmPostDom(*bbs.begin());
636 for (auto bb : bbs)
637 {
638 if (bb->getId() > maxId)
639 maxId = bb->getId();
640
641 auto& postDomBB = postDoms[bb->getId()];
642 for (unsigned i = 0, size = commonImmDoms.size(); i != size; i++)
643 {
644 if (commonImmDoms[i])
645 {
646 if (postDomBB.find(commonImmDoms[i]) == postDomBB.end())
647 {
648 commonImmDoms[i] = nullptr;
649 }
650 }
651 }
652 }
653
654 // Return first imm dom that is not a BB from bbs set
655 for (G4_BB* commonImmDom : commonImmDoms)
656 {
657 if (commonImmDom &&
658 // Common imm pdom must be lexically last BB
659 commonImmDom->getId() >= maxId &&
660 ((commonImmDom->size() > 1 && commonImmDom->front()->isLabel()) ||
661 (commonImmDom->size() > 0 && !commonImmDom->front()->isLabel())))
662 {
663 return commonImmDom;
664 }
665 }
666
667 return exitBB;
668 }
669
LoopDetection(G4_Kernel & k)670 LoopDetection::LoopDetection(G4_Kernel& k) : kernel(k), fg(k.fg)
671 {
672 }
673
getTopLoops()674 std::vector<Loop*> LoopDetection::getTopLoops()
675 {
676 recomputeIfStale();
677
678 return topLoops;
679 }
680
getInnerMostLoop(const G4_BB * bb)681 Loop* Loop::getInnerMostLoop(const G4_BB* bb)
682 {
683 // if current loop contains bb, recurse loop tree and return
684 // most nested loop containing it.
685 // if current loop doesnt contain bb then return nullptr.
686 if (!contains(bb))
687 return nullptr;
688
689 for (auto& nested : immNested)
690 if(auto innerMost = nested->getInnerMostLoop(bb))
691 return innerMost;
692
693 return this;
694 }
695
getLoopExits()696 std::vector<G4_BB*>& Loop::getLoopExits()
697 {
698 // already computed before, so return old list
699 if (loopExits.size() > 0)
700 return loopExits;
701
702 // list all successors of loop BBs that are themselves not part of loop, ie loop exits.
703 // this loop may add duplicate entries to exits. those are cleaned up later.
704 std::list<G4_BB*> exits;
705 for (auto bb : BBs)
706 {
707 for (auto succ : bb->Succs)
708 {
709 if (contains(succ))
710 continue;
711 exits.push_back(succ);
712 }
713 }
714
715 // sort exits found by bbid
716 exits.sort([](G4_BB* bb1, G4_BB* bb2) { return bb1->getId() < bb2->getId(); });
717 // remove duplicates
718 exits.unique();
719 // transfer data to class member for future invocations
720 std::for_each(exits.begin(), exits.end(), [&](G4_BB* bb) { loopExits.push_back(bb); });
721
722 return loopExits;
723 }
724
getPreheader(Loop * loop)725 G4_BB* LoopDetection::getPreheader(Loop* loop)
726 {
727 if (loop->preHeader)
728 return loop->preHeader;
729
730 // return pre-header if one exists, otherwise create a new one and return it
731 auto header = loop->getHeader();
732 auto headerPreds = header->Preds;
733
734 // for a BB to be valid pre-header, it needs to fulfill following criteria:
735 // 1. BB should be a predecessor of loop header,
736 // 2. BB should dominate header
737 // 3. BB's only successor should be loop header
738 // 4. Loop header should've no other predecessor outside the loop
739
740 G4_BB* enteringNode = nullptr;
741 bool found = false;
742 for (auto pred : headerPreds)
743 {
744 if (loop->contains(pred))
745 continue;
746
747 if (!enteringNode)
748 {
749 enteringNode = pred;
750 found = true;
751 }
752 else
753 {
754 found = false;
755 break;
756 }
757
758 if (!enteringNode->dominates(header))
759 {
760 found = false;
761 break;
762 }
763
764 if (enteringNode->Succs.size() > 1)
765 {
766 found = false;
767 break;
768 }
769 }
770
771 if (found && enteringNode)
772 {
773 // entering node is legal preheader for loop
774 loop->preHeader = enteringNode;
775 return enteringNode;
776 }
777
778 // a valid pre-header wasnt found, so create one and return it
779 // unless any pred uses SIMD CF goto in to loop header
780 if (header->getLabel())
781 {
782 auto headerLblStr = std::string(header->getLabel()->getLabel());
783 for (auto pred : headerPreds)
784 {
785 if (pred->isEndWithGoto())
786 {
787 auto gotoInst = pred->back()->asCFInst();
788 auto jipStr = std::string(gotoInst->getJipLabelStr());
789 auto uipStr = std::string(gotoInst->getUipLabelStr());
790 if (jipStr == headerLblStr ||
791 uipStr == headerLblStr)
792 {
793 // dont create preheader for this loop as a predecessor
794 // of header has SIMD CF in to loop header.
795 return nullptr;
796 }
797 }
798 else if (pred->size() > 0 &&
799 pred->back()->opcode() == G4_jmpi)
800 {
801 // TODO: may be safe to alter CF by changing jmpi destination
802 // to preheader instead of header.
803 return nullptr;
804 }
805 }
806 }
807
808 G4_BB* preHeader = kernel.fg.createNewBBWithLabel("preHeader");
809 for (auto pred : headerPreds)
810 {
811 if (loop->contains(pred))
812 continue;
813
814 kernel.fg.removePredSuccEdges(pred, header);
815 kernel.fg.addPredSuccEdges(pred, preHeader);
816 }
817 kernel.fg.addPredSuccEdges(preHeader, header);
818
819 for (auto bbIt = kernel.fg.begin(); bbIt != kernel.fg.end(); ++bbIt)
820 {
821 if (*bbIt == header)
822 {
823 kernel.fg.insert(bbIt, preHeader);
824 break;
825 }
826 }
827
828 loop->preHeader = preHeader;
829 if (loop->parent)
830 loop->parent->addBBToLoopHierarchy(preHeader);
831
832 // adding/deleted CFG edges causes loop information to become
833 // stale. we fix this by inserting preheader BB to all
834 // parent loops. and then we set valid flag so no recomputation
835 // is needed.
836 setValid();
837
838 return preHeader;
839 }
840
getInnerMostLoop(const G4_BB * bb)841 Loop* LoopDetection::getInnerMostLoop(const G4_BB* bb)
842 {
843 recomputeIfStale();
844
845 for (auto& loop : topLoops)
846 if (auto innerMost = loop->getInnerMostLoop(bb))
847 return innerMost;
848
849 return nullptr;
850 }
851
computePreheaders()852 void LoopDetection::computePreheaders()
853 {
854 recomputeIfStale();
855
856 for (auto& loop : allLoops)
857 {
858 (void)getPreheader(&loop);
859 }
860 }
861
reset()862 void LoopDetection::reset()
863 {
864 allLoops.clear();
865 topLoops.clear();
866
867 PreIdRPostId.clear();
868
869 setStale();
870 }
871
872 // Adapted from FlowGraph::DFSTraverse.
873 // No changes are made to any G4_BB member or to FlowGraph.
DFSTraverse(const G4_BB * startBB,unsigned & preId,unsigned & postId,BackEdges & bes)874 void LoopDetection::DFSTraverse(const G4_BB* startBB, unsigned& preId, unsigned& postId, BackEdges& bes)
875 {
876 std::stack<const G4_BB*> traversalStack;
877 traversalStack.push(startBB);
878
879 auto getPreId = [&](const G4_BB* bb)
880 {
881 return PreIdRPostId[bb].first;
882 };
883
884 auto getRPostId = [&](const G4_BB* bb)
885 {
886 return PreIdRPostId[bb].second;
887 };
888
889 auto setPreId = [&](const G4_BB* bb, unsigned int id)
890 {
891 PreIdRPostId[bb].first = id;
892 };
893
894 auto setRPostId = [&](const G4_BB* bb, unsigned int id)
895 {
896 PreIdRPostId[bb].second = id;
897 };
898
899 while (!traversalStack.empty())
900 {
901 auto bb = traversalStack.top();
902 if (getPreId(bb) != UINT_MAX)
903 {
904 // Pre-processed already and continue to the next one.
905 // Before doing so, set postId if not set before.
906 traversalStack.pop();
907 if (getRPostId(bb) == UINT_MAX)
908 {
909 // All bb's succ has been visited (PreId is set) at this time.
910 // if any of its succ has not been finished (RPostId not set),
911 // bb->succ forms a backedge.
912 //
913 // Note: originally, CALL and EXIT will not check back-edges, here
914 // we skip checking for them as well. (INIT & RETURN should
915 // be checked as well ?)
916 if (!(bb->getBBType() & (G4_BB_CALL_TYPE | G4_BB_EXIT_TYPE)))
917 {
918 for (auto succBB : bb->Succs)
919 {
920 if (getRPostId(succBB) == UINT_MAX)
921 {
922 BackEdge be = std::make_pair(const_cast<G4_BB*>(bb), succBB);
923 bes.push_back(be);
924 }
925 }
926 }
927
928 // Need to keep this after backedge checking so that self-backedge
929 // (single-bb loop) will not be missed.
930 setRPostId(bb, postId++);
931 }
932 continue;
933 }
934
935 setPreId(bb, preId++);
936
937 if (bb->getBBType() & G4_BB_CALL_TYPE)
938 {
939 const G4_BB* returnBB = bb->BBAfterCall();
940
941 if (getPreId(returnBB) == UINT_MAX)
942 {
943 traversalStack.push(returnBB);
944 }
945 else
946 {
947 MUST_BE_TRUE(false, ERROR_FLOWGRAPH);
948 }
949 }
950 else if (bb->getBBType() & G4_BB_EXIT_TYPE)
951 {
952 // Skip
953 }
954 else
955 {
956 // To be consistent with previous behavior, use reverse_iter.
957 auto RIE = bb->Succs.rend();
958 for (auto rit = bb->Succs.rbegin(); rit != RIE; ++rit)
959 {
960 const G4_BB* succBB = *rit;
961 if (getPreId(succBB) == UINT_MAX)
962 {
963 traversalStack.push(succBB);
964 }
965 }
966 }
967 }
968 }
969
findDominatingBackEdges(BackEdges & bes)970 void LoopDetection::findDominatingBackEdges(BackEdges& bes)
971 {
972 const auto& BBs = fg.getBBList();
973
974 for (auto& bb : BBs)
975 {
976 PreIdRPostId[bb] = std::make_pair(UINT_MAX, UINT_MAX);
977 }
978
979 unsigned preId = 0;
980 unsigned postID = 0;
981
982 DFSTraverse(fg.getEntryBB(), preId, postID, bes);
983
984 for (auto fn : fg.funcInfoTable)
985 {
986 DFSTraverse(fn->getInitBB(), preId, postID, bes);
987 }
988 }
989
populateLoop(BackEdge & backEdge)990 void LoopDetection::populateLoop(BackEdge& backEdge)
991 {
992 // check whether dst dominates src
993 auto src = const_cast<G4_BB*>(backEdge.first);
994 auto dst = const_cast<G4_BB*>(backEdge.second);
995
996 auto& domSecond = fg.getDominator().getDom(src);
997 if (domSecond.find(dst) != domSecond.end())
998 {
999 // this is a natural loop back edge. populate all bbs in loop.
1000 Loop newLoop(backEdge);
1001 newLoop.id = allLoops.size() + 1;
1002
1003 newLoop.addBBToLoop(src);
1004 newLoop.addBBToLoop(dst);
1005
1006 std::stack<G4_BB*> traversal;
1007 traversal.push(src);
1008 while (!traversal.empty())
1009 {
1010 auto bb = traversal.top();
1011 traversal.pop();
1012
1013 // check whether bb's preds are dominated by loop header.
1014 // if yes, add them to traversal.
1015 for (auto predIt = bb->Preds.begin(); predIt != bb->Preds.end(); ++predIt)
1016 {
1017 auto pred = (*predIt);
1018 // pred is loop header, which is already added to list of loop BBs
1019 if (pred == dst)
1020 continue;
1021
1022 if (dst->dominates(pred) &&
1023 !newLoop.contains(pred))
1024 {
1025 // pred is part of loop
1026 newLoop.addBBToLoop(pred);
1027 traversal.push(pred);
1028 }
1029 }
1030 }
1031
1032 (void)newLoop.getLoopExits();
1033 allLoops.emplace_back(newLoop);
1034 }
1035 }
1036
computeLoopTree()1037 void LoopDetection::computeLoopTree()
1038 {
1039 // create loop tree by iterating over allLoops in descending order
1040 // of BB count.
1041 std::vector<Loop*> sortedLoops;
1042 std::for_each(allLoops.begin(), allLoops.end(), [&](Loop& l) { sortedLoops.push_back(&l); });
1043
1044 // sorting loops by size of contained BBs makes it easy to create
1045 // tree structure relationship of loops.
1046 // 1. If loop A has more BBs than loop B then A is either some parent of B or no relationship exists.
1047 // 2. For loop A to be a parent of loop B, all BBs of loop B have to be contained in loop A as well.
1048 //
1049 // processing loops in sorted order of BB size guarantees that we'll create tree in top-down order.
1050 // we'll never encounter a situation where new loop to be added to tree is parent of an existing
1051 // loop already present in the tree.
1052 std::sort(sortedLoops.begin(), sortedLoops.end(), [](Loop* l1, Loop* l2) { return l2->getBBSize() < l1->getBBSize(); });
1053
1054 for (auto loop : sortedLoops)
1055 {
1056 addLoop(loop, nullptr);
1057 }
1058 }
1059
addLoop(Loop * newLoop,Loop * aParent)1060 void LoopDetection::addLoop(Loop* newLoop, Loop* aParent)
1061 {
1062 if (topLoops.size() == 0)
1063 {
1064 topLoops.push_back(newLoop);
1065 return;
1066 }
1067
1068 // find a place in existing loop tree to insert new loop passed in.
1069 // following scenarios exist:
1070 // a. loop is nested loop of an existing loop,
1071 // b. loop is not nested but is sibling of existing loop,
1072 // c. loop is top level parent loop of a certain tree
1073
1074 // check if newLoop fits in to any existing current top level loop
1075 auto siblings = aParent ? aParent->getAllSiblings(topLoops) : topLoops;
1076 for (auto& sibling : siblings)
1077 {
1078 if (newLoop->fullSubset(sibling))
1079 {
1080 if (sibling->immNested.size() > 0)
1081 {
1082 addLoop(newLoop, sibling->immNested[0]);
1083 }
1084 else
1085 {
1086 sibling->immNested.push_back(newLoop);
1087 newLoop->parent = sibling;
1088 }
1089 return;
1090 }
1091 else if (newLoop->fullSuperset(sibling))
1092 {
1093 MUST_BE_TRUE(false, "Not expecting to see parent loop here");
1094 return;
1095 }
1096 }
1097
1098 // add new sibling to current level
1099 newLoop->parent = siblings[0]->parent;
1100 if (newLoop->parent)
1101 {
1102 siblings[0]->parent->immNested.push_back(newLoop);
1103 }
1104 else
1105 {
1106 topLoops.push_back(newLoop);
1107 }
1108 }
1109
run()1110 void LoopDetection::run()
1111 {
1112 BackEdges backEdges;
1113 findDominatingBackEdges(backEdges);
1114 for (auto& be : backEdges)
1115 {
1116 populateLoop(be);
1117 }
1118
1119 computeLoopTree();
1120
1121 setValid();
1122 }
1123
dump(std::ostream & os)1124 void LoopDetection::dump(std::ostream& os)
1125 {
1126 if(isStale())
1127 os << "Loop info is stale.\n";
1128
1129 os << "\n\n\nLoop tree:\n";
1130
1131 for (auto loop : topLoops)
1132 {
1133 loop->dump(os);
1134 }
1135 }
1136
1137 // add bb to current loop and to all valid parents
addBBToLoopHierarchy(G4_BB * bb)1138 void Loop::addBBToLoopHierarchy(G4_BB* bb)
1139 {
1140 addBBToLoop(bb);
1141
1142 if (parent)
1143 parent->addBBToLoopHierarchy(bb);
1144 }
1145
addBBToLoop(G4_BB * bb)1146 void vISA::Loop::addBBToLoop(G4_BB* bb)
1147 {
1148 if (!contains(bb))
1149 {
1150 BBs.push_back(bb);
1151 BBsLookup.insert(bb);
1152 }
1153 }
1154
fullSubset(Loop * other)1155 bool Loop::fullSubset(Loop* other)
1156 {
1157 if (BBs.size() > other->BBs.size())
1158 return false;
1159
1160 // to avoid O(N^2) lookup, use unordered set of other loop's BBs for lookup
1161 auto& otherBBs = other->BBsLookup;
1162
1163 // check whether current loop's all BBs are fully contained in "other" loop
1164 for (auto bb : BBs)
1165 {
1166 if (otherBBs.find(bb) == otherBBs.end())
1167 return false;
1168 }
1169
1170 return true;
1171 }
1172
fullSuperset(Loop * other)1173 bool Loop::fullSuperset(Loop* other)
1174 {
1175 return other->fullSubset(this);
1176 }
1177
getAllSiblings(std::vector<Loop * > & topLoops)1178 std::vector<Loop*> Loop::getAllSiblings(std::vector<Loop*>& topLoops)
1179 {
1180 if (parent)
1181 return parent->immNested;
1182
1183 return topLoops;
1184 }
1185
getNestingLevel()1186 unsigned int Loop::getNestingLevel()
1187 {
1188 if (!parent)
1189 return 1;
1190
1191 return parent->getNestingLevel()+1;
1192 }
1193
dump(std::ostream & os)1194 void Loop::dump(std::ostream& os)
1195 {
1196 auto nestingLevel = getNestingLevel();
1197 nestingLevel = nestingLevel > 0 ? nestingLevel : 1;
1198 for (unsigned int i = 0; i != nestingLevel - 1; ++i)
1199 {
1200 os << "\t";
1201 }
1202 os << "L" << id << ": - { ";
1203 for (auto bb : BBs)
1204 {
1205 os << bb->getId();
1206 if (bb != BBs.back())
1207 os << ", ";
1208 }
1209 os << " } ";
1210
1211 auto labelStr = std::string("BB") + (preHeader ? std::to_string(preHeader->getId()) :
1212 std::string("--"));
1213
1214 if (preHeader && preHeader->getLabel())
1215 labelStr += "(" + std::string(preHeader->getLabel()->getLabel()) + ")";
1216
1217 std::string exitBBs = "{ ";
1218 for (auto bb : loopExits)
1219 {
1220 exitBBs += "BB" + std::to_string(bb->getId());
1221 if (bb != loopExits.back())
1222 exitBBs += ", ";
1223 }
1224 exitBBs += " }";
1225
1226 os << " BE: {BB" << be.first->getId() << " -> BB" << be.second->getId() << "}, " <<
1227 "PreHeader: " << labelStr << ", " << "Loop exits: " << exitBBs << std::endl;
1228
1229 for (auto& nested : immNested)
1230 {
1231 nested->dump(os);
1232 }
1233 }
1234
contains(const G4_BB * bb)1235 bool Loop::contains(const G4_BB* bb)
1236 {
1237 return BBsLookup.find(bb) != BBsLookup.end();
1238 }
1239
isUniqueDef(G4_DstRegRegion * dst)1240 bool VarReferences::isUniqueDef(G4_DstRegRegion* dst)
1241 {
1242 recomputeIfStale();
1243
1244 auto dcl = dst->getTopDcl();
1245
1246 // return true if spilled variable has a single static def
1247 // and it is not live-in to current bb (eg, loop, sub).
1248 if (getDefCount(dcl) != 1)
1249 {
1250 // check whether multiple defs exist in program for current
1251 // lb, rb
1252 auto lb = dst->getLeftBound();
1253 auto rb = dst->getRightBound();
1254
1255 unsigned int count = 0;
1256 const auto& defs = getDefs(dcl);
1257 for (auto& def : *defs)
1258 {
1259 if (std::get<2>(def) <= rb &&
1260 std::get<3>(def) >= lb)
1261 ++count;
1262 }
1263
1264 if (count > 1)
1265 return false;
1266 }
1267
1268 return true;
1269 }
1270
getDefCount(G4_Declare * dcl)1271 unsigned int VarReferences::getDefCount(G4_Declare* dcl)
1272 {
1273 auto defs = getDefs(dcl);
1274 if (defs)
1275 return defs->size();
1276 return 0;
1277 }
1278
getUseCount(G4_Declare * dcl)1279 unsigned int VarReferences::getUseCount(G4_Declare* dcl)
1280 {
1281 auto uses = getUses(dcl);
1282 if (uses)
1283 return uses->size();
1284 return 0;
1285 }
1286
getDefs(G4_Declare * dcl)1287 const VarReferences::Defs* VarReferences::getDefs(G4_Declare* dcl)
1288 {
1289 recomputeIfStale();
1290
1291 auto it = VarRefs.find(dcl);
1292 if (it != VarRefs.end())
1293 return &(it->second.first);
1294
1295 return nullptr;
1296 }
1297
getUses(G4_Declare * dcl)1298 const VarReferences::Uses* VarReferences::getUses(G4_Declare* dcl)
1299 {
1300 recomputeIfStale();
1301
1302 auto it = VarRefs.find(dcl);
1303 if (it != VarRefs.end())
1304 return &(it->second.second);
1305
1306 return nullptr;
1307 }
1308
reset()1309 void VarReferences::reset()
1310 {
1311 VarRefs.clear();
1312
1313 setStale();
1314 }
1315
run()1316 void VarReferences::run()
1317 {
1318 for (auto bb : kernel.fg)
1319 {
1320 for (auto inst : *bb)
1321 {
1322 if (inst->isPseudoKill())
1323 continue;
1324
1325 auto dst = inst->getDst();
1326
1327 if (dst && !dst->isNullReg())
1328 {
1329 auto topdcl = dst->getTopDcl();
1330
1331 if (topdcl)
1332 {
1333 auto lb = dst->getLeftBound();
1334 auto rb = dst->getRightBound();
1335 auto& Defs = VarRefs[topdcl].first;
1336 Defs.push_back(std::make_tuple(inst, bb, lb, rb));
1337 }
1338 }
1339
1340 for (unsigned int i = 0; i != inst->getNumSrc(); ++i)
1341 {
1342 auto src = inst->getSrc(i);
1343 if (src && src->isSrcRegRegion())
1344 {
1345 auto topdcl = src->asSrcRegRegion()->getTopDcl();
1346 if (topdcl)
1347 {
1348 auto& Uses = VarRefs[topdcl].second;
1349 Uses.push_back(std::make_tuple(inst, bb));
1350 }
1351 }
1352 }
1353 }
1354 }
1355
1356 setValid();
1357 }
1358
dump(std::ostream & os)1359 void VarReferences::dump(std::ostream& os)
1360 {
1361 if (isStale())
1362 os << "Data is stale.\n";
1363
1364 os << "#Dcls with defs/uses: " << VarRefs.size();
1365 }
1366