1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 /*========================== begin_copyright_notice ============================
10 
11 This file is distributed under the University of Illinois Open Source License.
12 See LICENSE.TXT for details.
13 
14 ============================= end_copyright_notice ===========================*/
15 
16 // Estimates the register pressure at a program point.
17 
18 #include "RegisterPressureEstimate.hpp"
19 #include "Compiler/IGCPassSupport.h"
20 #include <set>
21 #include "common/debug/Debug.hpp"
22 #include "common/debug/Dump.hpp"
23 #include "common/igc_regkeys.hpp"
24 #include "common/LLVMWarningsPush.hpp"
25 #include <llvm/IR/Intrinsics.h>
26 #include <llvm/IR/Function.h>
27 #include <llvm/IR/InstIterator.h>
28 #include <llvm/Transforms/Utils/Local.h>
29 #include "common/LLVMWarningsPop.hpp"
30 #include "Probe/Assertion.h"
31 
32 using namespace llvm;
33 using namespace IGC::Debug;
34 using namespace IGC;
35 
36 char RegisterPressureEstimate::ID = 0;
37 #define PASS_FLAG "igc-RegisterPressureEstimate"
38 #define PASS_DESCRIPTION "GenX Register Pressure Analysis"
39 #define PASS_CFG_ONLY false
40 #define PASS_ANALYSIS false
41 IGC_INITIALIZE_PASS_BEGIN(RegisterPressureEstimate, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
42 IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
43 IGC_INITIALIZE_PASS_END(RegisterPressureEstimate, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
44 
45 namespace IGC
46 {
getAnalysisUsage(AnalysisUsage & AU) const47     void RegisterPressureEstimate::getAnalysisUsage(AnalysisUsage& AU) const
48     {
49         AU.setPreservesAll();
50         AU.addRequired<LoopInfoWrapperPass>();
51         AU.addRequired<WIAnalysis>();
52     }
53 
runOnFunction(Function & F)54     bool RegisterPressureEstimate::runOnFunction(Function& F)
55     {
56         m_DL = &F.getParent()->getDataLayout();
57         m_pFunc = &F;
58         LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
59         WI = &getAnalysis<WIAnalysis>();
60         m_available = buildLiveIntervals(true);
61         return false;
62     }
63 
64     /// \brief Assign a number to each instruction in a function.
65     ///
66     /// - Arguments get number 0;
67     /// - Basic blocks get a number;
68     /// - Phi nodes in each basic block get the same number;
69     /// - All other instructions get a unique number, if assigned.
70     ///
assignNumbers()71     void RegisterPressureEstimate::assignNumbers()
72     {
73         unsigned Num = 0;
74 
75         // Arguments assigned with number 0.
76         for (auto AI = m_pFunc->arg_begin(), AE = m_pFunc->arg_end(); AI != AE; ++AI)
77         {
78             Argument* Arg = &(*AI);
79             m_pNumbers[Arg] = Num;
80             if (!Arg->use_empty())
81             {
82                 getOrCreateLiveRange(Arg);
83             }
84         }
85 
86         // Assign a number to basic blocks and instructions.
87         auto& BBs = m_pFunc->getBasicBlockList();
88         for (auto BI = BBs.begin(), BE = BBs.end(); BI != BE; ++BI)
89         {
90             BasicBlock* BB = &*BI;
91             unsigned BlockNum = m_pNumbers[BB] = Num++;
92             for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
93             {
94                 Instruction* Inst = &(*II);
95                 if (isa<DbgInfoIntrinsic>(Inst))
96                 {
97                     continue;
98                 }
99                 if (isa<PHINode>(Inst))
100                 {
101                     m_pNumbers[Inst] = BlockNum;
102                 }
103                 else
104                 {
105                     m_pNumbers[Inst] = Num++;
106                 }
107                 if (!Inst->use_empty())
108                 {
109                     getOrCreateLiveRange(Inst);
110                 }
111             }
112         }
113 
114         MaxAssignedNumber = Num;
115     }
116 
117     /// Construct a string from a unsigned integer with the intended width.
alignedString(unsigned Val)118     static std::string alignedString(unsigned Val)
119     {
120         const unsigned Width = 3;
121         std::string Str = Twine(Val).str();
122         if (Str.size() < Width)
123         {
124             Str.insert(Str.begin(), Width - Str.size(), ' ');
125         }
126         return Str;
127     }
128 
printNumbering(raw_ostream & OS)129     void RegisterPressureEstimate::printNumbering(raw_ostream& OS)
130     {
131         auto& BBs = m_pFunc->getBasicBlockList();
132         unsigned UnamedBBNum = 1;
133         for (auto BI = BBs.begin(), BE = BBs.end(); BI != BE; ++BI)
134         {
135             if (m_pNumbers.count(&(*BI)))
136             {
137                 OS << "[" << alignedString(m_pNumbers[&(*BI)]) << "] ";
138             }
139             if (BI->hasName())
140             {
141                 OS << BI->getName() << ":\n";
142             }
143             else
144             {
145                 OS << ";<label>:" + Twine(UnamedBBNum++) << "\n";
146             }
147 
148             for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II)
149             {
150                 Instruction* Inst = &(*II);
151                 if (m_pNumbers.count(Inst))
152                 {
153                     OS << "[" << alignedString(m_pNumbers[Inst]) << "] ";
154                 }
155                 Inst->print(OS);
156                 OS << "\n";
157             }
158         }
159     }
160 
dumpNumbering()161     void RegisterPressureEstimate::dumpNumbering()
162     {
163         printNumbering(ods());
164     }
165 
getAssignedNumberForInst(Instruction * pInst)166     unsigned RegisterPressureEstimate::getAssignedNumberForInst(Instruction* pInst)
167     {
168         return m_pNumbers[pInst];
169     }
170 
getMaxAssignedNumberForBB(BasicBlock * pBB)171     unsigned RegisterPressureEstimate::getMaxAssignedNumberForBB(BasicBlock* pBB)
172     {
173         return m_pNumbers[&pBB->back()];
174     }
175 
getMinAssignedNumberForBB(BasicBlock * pBB)176     unsigned RegisterPressureEstimate::getMinAssignedNumberForBB(BasicBlock* pBB)
177     {
178         return m_pNumbers[&pBB->front()];
179     }
180 
setBegin(unsigned Begin)181     void RegisterPressureEstimate::LiveRange::setBegin(unsigned Begin)
182     {
183         for (auto& Seg : Segments)
184         {
185             if (Seg.Begin < Begin)
186             {
187                 Seg.Begin = Begin;
188             }
189         }
190     }
191 
sortAndMerge()192     void RegisterPressureEstimate::LiveRange::sortAndMerge()
193     {
194         std::sort(Segments.begin(), Segments.end());
195         unsigned NewSize = 0;
196         for (unsigned i = 0; i != Segments.size(); ++i)
197         {
198             if (NewSize && Segments[i].Begin <= Segments[NewSize - 1].End)
199             {
200                 Segments[NewSize - 1].End =
201                     std::max(Segments[i].End, Segments[NewSize - 1].End);
202             }
203             else
204             {
205                 Segments[NewSize++] = Segments[i];
206             }
207         }
208         Segments.resize(NewSize);
209     }
210 
print(raw_ostream & OS) const211     void RegisterPressureEstimate::LiveRange::print(raw_ostream& OS) const
212     {
213         for (auto& Seg : Segments)
214         {
215             OS << "  [" << Seg.Begin << ", " << Seg.End << ")";
216         }
217     }
218 
dump() const219     void RegisterPressureEstimate::LiveRange::dump() const
220     {
221         print(ods());
222     }
223 
printLiveRanges(raw_ostream & OS)224     void RegisterPressureEstimate::printLiveRanges(raw_ostream& OS)
225     {
226         OS << "\nLive ranges:";
227         for (auto& Item : m_pLiveRanges)
228         {
229             OS << "\n";
230             Item.first->printAsOperand(OS);
231             OS << ":\n";
232             Item.second->print(OS);
233             OS << "\n";
234         }
235     }
236 
dumpLiveRanges()237     void RegisterPressureEstimate::dumpLiveRanges()
238     {
239         printLiveRanges(ods());
240     }
241 
242     /// The algorithm is from "Linear Scan Register Allocation On SSA Form" by
243     /// Christian Wimmer and Michael Franz, CGO 2010.
244     ///
245     /// For each block b in reverse order do
246     ///    live = union of successor.livein for each successor of b
247     ///
248     ///    for each phi of successors of b do
249     ///       live.add(phi.inputOf(b))
250     ///
251     ///    for each opnd in live do
252     ///       intervals[opnd].addRange(b.from, b.to)
253     ///
254     ///    for each operation op of b in reverse order do
255     ///       for each output operand opnd of op do
256     ///           intervals[opnd].setFrom(op.id)
257     ///           live.remove(opnd)
258     ///       for each input operand opnd of op do
259     ///           intervals[opnd].addRange(b.from, op.id)
260     ///           live.add(opnd)
261     ///
262     ///    for each phi of b do
263     ///       live.remove(phi.output)
264     ///
265     ///    if b is loop header then
266     ///       loopEnd = last block of the loop starting at b
267     ///       for each opnd in live do
268     ///           intervals[opnd].addRange(b.from, loopEnd.to)
269     ///
270     ///    b.livein = live
271     ///
272     /// Return true if live interval is calculated successfully; false otherwise.
buildLiveIntervals(bool RemoveLR)273     bool RegisterPressureEstimate::buildLiveIntervals(bool RemoveLR)
274     {
275         // Clear existing data if any.
276         m_pNumbers.clear();
277 
278         clear(RemoveLR);
279 
280         // Assign a number to arguments, basic blocks and instructions.
281         // build the live-range pool.
282         assignNumbers();
283 
284         unsigned OverallEstimate = 0;
285         // quick estimate
286         for (auto VI = m_pLiveRanges.begin(), VE = m_pLiveRanges.end();
287             VI != VE; ++VI)
288         {
289             auto V = VI->first;
290             unsigned RangeStart = m_pNumbers[V];
291             unsigned MaxRange = 0;
292             // need to find the last use
293             for (auto UI = V->user_begin(), UE = V->user_end(); UI != UE; ++UI)
294             {
295                 Instruction* UseI = dyn_cast<Instruction>(*UI);
296                 if (!UseI || isInstructionTriviallyDead(UseI))
297                     continue;
298                 unsigned RangeEnd = RangeStart;
299                 if (PHINode* PN = dyn_cast<PHINode>(UseI))
300                 {
301                     // PHI nodes use the operand in the predecessor block,
302                     // not the block with the PHI.
303                     Use& U = UI.getUse();
304                     unsigned num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
305                     auto UseBB = PN->getIncomingBlock(num);
306                     RangeEnd = m_pNumbers[&UseBB->back()] + 1;
307                 }
308                 else
309                 {
310                     RangeEnd = m_pNumbers[UseI];
311                 }
312                 if (RangeEnd > RangeStart && RangeEnd - RangeStart > MaxRange)
313                     MaxRange = RangeEnd - RangeStart;
314                 else if (RangeStart > RangeEnd && RangeStart - RangeEnd > MaxRange)
315                     MaxRange = RangeStart - RangeEnd;
316             }
317             OverallEstimate += MaxRange * getValueBytes(V);
318         }
319         OverallEstimate = iSTD::Round(OverallEstimate, SIMD_PRESSURE_MULTIPLIER) /
320             SIMD_PRESSURE_MULTIPLIER;
321         OverallEstimate = OverallEstimate / getMaxAssignedNumberForFunction();
322         if (OverallEstimate > OVERALL_PRESSURE_UPBOUND)
323             return false;
324 
325         DenseMap<BasicBlock*, std::set<Value*>> BlockLiveMap;
326         auto& BBs = m_pFunc->getBasicBlockList();
327         // Top level loop to visit each block once in reverse order.
328         for (auto BI = BBs.rbegin(), BE = BBs.rend(); BI != BE; ++BI)
329         {
330             BasicBlock* BB = &*BI;
331             auto Result = BlockLiveMap.insert(std::make_pair(BB, std::set<Value*>()));
332             IGC_ASSERT_MESSAGE(Result.second, "must not be processed yet");
333             std::set<Value*>& Live = Result.first->second;
334 
335             // live = union of successor.livein for each successor of b
336             //
337             // for each phi of successors of b do
338             //     live.add(phi.inputOf(b))
339             //
340             for (auto PI = succ_begin(BB), PE = succ_end(BB); PI != PE; ++PI)
341             {
342                 BasicBlock* Succ = *PI;
343                 auto Iter = BlockLiveMap.find(Succ);
344                 if (Iter != BlockLiveMap.end())
345                 {
346                     std::set<Value*>& SuccLive = Iter->second;
347                     Live.insert(SuccLive.begin(), SuccLive.end());
348                 }
349 
350                 // For each phi node from successors, update liveness.
351                 for (auto II = Succ->begin(), IE = Succ->end(); II != IE; ++II)
352                 {
353                     Instruction* Inst = &*II;
354                     if (auto PN = dyn_cast<PHINode>(Inst))
355                     {
356                         Live.insert(PN->getIncomingValueForBlock(BB));
357                     }
358                     else
359                     {
360                         // all phi's are in the first few instructions.
361                         break;
362                     }
363                 }
364             }
365 
366             // The basic block number.
367             unsigned BlockNum = m_pNumbers[BB];
368 
369             // for each opnd in live do
370             //    intervals[opnd].addRange(b.from, b.to)
371             for (auto I = Live.begin(), E = Live.end(); I != E; ++I)
372             {
373                 unsigned End = m_pNumbers[&BB->back()] + 1;
374                 Value* V = *I;
375                 if (auto LR = getLiveRangeOrNull(V))
376                 {
377                     LR->addSegment(BlockNum, End);
378                 }
379             }
380             // for each operation op of b in reverse order do
381             //     for each output operand opnd of op do
382             //         intervals[opnd].setFrom(op.id)
383             //         live.remove(opnd)
384             //     for each input operand opnd of op do
385             //         intervals[opnd].addRange(b.from, op.id)
386             //         live.add(opnd)
387             for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II)
388             {
389                 Instruction* Inst = &*II;
390 
391                 // Skip debugging intrinsic calls.
392                 if (isa<DbgInfoIntrinsic>(Inst))
393                 {
394                     continue;
395                 }
396 
397                 // Skip phi nodes and its predecessors decide the live variables.
398                 if (isa<PHINode>(Inst))
399                 {
400                     continue;
401                 }
402 
403                 // The instruction number.
404                 unsigned InstNum = m_pNumbers[Inst];
405 
406                 if (!Inst->use_empty())
407                 {
408                     if (auto LR = getLiveRangeOrNull(Inst))
409                     {
410                         LR->setBegin(InstNum);
411                     }
412                     Live.erase(Inst);
413                 }
414 
415                 // Handle its input operands.
416                 for (auto OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI)
417                 {
418                     Value* Opnd = *OI;
419                     if (isa<Argument>(Opnd) || isa<Instruction>(Opnd))
420                     {
421                         if (LiveRange * LR = getLiveRangeOrNull(Opnd))
422                         {
423                             LR->addSegment(BlockNum, InstNum);
424                         }
425                         Live.insert(Opnd);
426                     }
427                 }
428             }
429 
430             // for each phi of b do
431             //     live.remove(phi.output)
432             //
433             for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
434             {
435                 Instruction* Inst = &*II;
436                 if (isa<PHINode>(Inst))
437                 {
438                     Live.erase(Inst);
439                 }
440                 else
441                 {
442                     // all phi's are in the first few instructions.
443                     break;
444                 }
445             }
446 
447             // if b is loop header then
448             //     loopEnd = last block of the loop starting at b
449             //     for each opnd in live do
450             //         intervals[opnd].addRange(b.from, loopEnd.to)
451             //
452             if (LI->isLoopHeader(BB))
453             {
454                 Loop* L = LI->getLoopFor(BB);
455                 if (L != nullptr)
456                 {
457                     if (BasicBlock * Latch = L->getLoopLatch())
458                     {
459                         for (auto I = Live.begin(), E = Live.end(); I != E; ++I)
460                         {
461                             unsigned End = m_pNumbers[&Latch->back()] + 1;
462                             if (auto LR = getLiveRangeOrNull(*I))
463                             {
464                                 LR->addSegment(BlockNum, End);
465                             }
466                         }
467                     }
468                 }
469                 else
470                 {
471                     // Just set unavailable of live range info for now.
472                     clear(RemoveLR);
473                     return false;
474                     // IGC_ASSERT_EXIT_MESSAGE(0, "Support for unnatural loops, not implemented yet");
475                 }
476             }
477         }
478 
479         // Finally, combine multiple live ranges into a single one and sort them.
480         mergeLiveRanges();
481         return true;
482     }
483 
getRegisterPressureForInstructionFromRPMap(unsigned number) const484     unsigned RegisterPressureEstimate::getRegisterPressureForInstructionFromRPMap(unsigned number) const
485     {
486         auto Iter = m_pRegisterPressureByInstruction.find(number);
487         if (Iter != m_pRegisterPressureByInstruction.end())
488         {
489             return iSTD::Round(Iter->second, SIMD_PRESSURE_MULTIPLIER) / SIMD_PRESSURE_MULTIPLIER;
490         }
491         else
492         {
493             return 0;
494         }
495     }
496 
buildRPMapPerInstruction()497     void RegisterPressureEstimate::buildRPMapPerInstruction()
498     {
499         unsigned maxNumberOfInstructions = getMaxAssignedNumberForFunction();
500         for (unsigned number = 0; number < maxNumberOfInstructions; number++)
501         {
502             m_pRegisterPressureByInstruction[number] = 0;
503         }
504 
505         // Segments are sorted.
506         for (auto I = m_pLiveRanges.begin(), E = m_pLiveRanges.end(); I != E; ++I)
507         {
508             Value* V = I->first;
509             unsigned int pressure = getValueBytes(V);
510             for (auto& Seg : I->second->Segments)
511             {
512                 for (unsigned number = Seg.Begin; number < Seg.End; number++)
513                 {
514                     m_pRegisterPressureByInstruction[number] += pressure;
515                 }
516             }
517         }
518         return;
519     }
520 
getRegisterPressure(Instruction * Inst) const521     unsigned RegisterPressureEstimate::getRegisterPressure(Instruction* Inst) const
522     {
523         auto Iter = m_pNumbers.find(Inst);
524         if (Iter != m_pNumbers.end())
525         {
526             // Find the instruction location.
527             unsigned N = Iter->second;
528 
529             // Now sum all intervals that contain this location.
530             unsigned Pressure = 0;
531 
532             // Segments are sorted.
533             for (auto I = m_pLiveRanges.begin(), E = m_pLiveRanges.end(); I != E; ++I)
534             {
535                 if (I->second->contains(N))
536                 {
537                     Value* V = I->first;
538                     Pressure += getValueBytes(V);
539                 }
540             }
541 
542             return iSTD::Round(Pressure, SIMD_PRESSURE_MULTIPLIER) / SIMD_PRESSURE_MULTIPLIER;
543         }
544 
545         // ignore this instruction.
546         return 0;
547     }
548 
getMaxRegisterPressure() const549     unsigned RegisterPressureEstimate::getMaxRegisterPressure() const
550     {
551         unsigned MaxPressure = 0;
552         for (auto BI = m_pFunc->begin(), BE = m_pFunc->end(); BI != BE; ++BI)
553         {
554             for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II)
555             {
556                 Instruction* Inst = &(*II);
557                 MaxPressure = std::max(MaxPressure, getRegisterPressure(Inst));
558             }
559         }
560 
561         return MaxPressure;
562     }
563 
getMaxRegisterPressure(BasicBlock * BB) const564     unsigned RegisterPressureEstimate::getMaxRegisterPressure(BasicBlock* BB) const
565     {
566         unsigned RP = 0;
567         for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
568         {
569             Instruction* Inst = &(*II);
570             RP = std::max(RP, getRegisterPressure(Inst));
571         }
572         return RP;
573     }
574 
printRegisterPressureInfo(bool Detailed,const char * msg) const575     void RegisterPressureEstimate::printRegisterPressureInfo
576     (bool Detailed,
577         const char* msg) const
578     {
579         unsigned MaxRP = getMaxRegisterPressure();
580         if (Detailed)
581         {
582             for (inst_iterator I = inst_begin(m_pFunc), E = inst_end(m_pFunc); I != E; ++I)
583             {
584                 Instruction* Inst = &*I;
585                 ods() << "[RP = " << getRegisterPressure(Inst) << "]";
586                 Inst->print(ods());
587                 ods() << "\n";
588             }
589         }
590 
591         ods() << "; " << msg << "\n";
592         ods() << "; Kernel " << m_pFunc->getName() << "\n";
593         ods() << "; Max RP = " << MaxRP << " bytes, (" << ((MaxRP + 31) / 32)
594             << " GRFs)\n\n";
595     }
596 }
597