1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 /*========================== begin_copyright_notice ============================ 10 11 This file is distributed under the University of Illinois Open Source License. 12 See LICENSE.TXT for details. 13 14 ============================= end_copyright_notice ===========================*/ 15 16 // Estimates the register pressure at a program point. 17 18 #include "RegisterPressureEstimate.hpp" 19 #include "Compiler/IGCPassSupport.h" 20 #include <set> 21 #include "common/debug/Debug.hpp" 22 #include "common/debug/Dump.hpp" 23 #include "common/igc_regkeys.hpp" 24 #include "common/LLVMWarningsPush.hpp" 25 #include <llvm/IR/Intrinsics.h> 26 #include <llvm/IR/Function.h> 27 #include <llvm/IR/InstIterator.h> 28 #include <llvm/Transforms/Utils/Local.h> 29 #include "common/LLVMWarningsPop.hpp" 30 #include "Probe/Assertion.h" 31 32 using namespace llvm; 33 using namespace IGC::Debug; 34 using namespace IGC; 35 36 char RegisterPressureEstimate::ID = 0; 37 #define PASS_FLAG "igc-RegisterPressureEstimate" 38 #define PASS_DESCRIPTION "GenX Register Pressure Analysis" 39 #define PASS_CFG_ONLY false 40 #define PASS_ANALYSIS false 41 IGC_INITIALIZE_PASS_BEGIN(RegisterPressureEstimate, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) 42 IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 43 IGC_INITIALIZE_PASS_END(RegisterPressureEstimate, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) 44 45 namespace IGC 46 { getAnalysisUsage(AnalysisUsage & AU) const47 void RegisterPressureEstimate::getAnalysisUsage(AnalysisUsage& AU) const 48 { 49 AU.setPreservesAll(); 50 AU.addRequired<LoopInfoWrapperPass>(); 51 AU.addRequired<WIAnalysis>(); 52 } 53 runOnFunction(Function & F)54 bool RegisterPressureEstimate::runOnFunction(Function& F) 55 { 56 m_DL = &F.getParent()->getDataLayout(); 57 m_pFunc = &F; 58 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 59 WI = &getAnalysis<WIAnalysis>(); 60 m_available = buildLiveIntervals(true); 61 return false; 62 } 63 64 /// \brief Assign a number to each instruction in a function. 65 /// 66 /// - Arguments get number 0; 67 /// - Basic blocks get a number; 68 /// - Phi nodes in each basic block get the same number; 69 /// - All other instructions get a unique number, if assigned. 70 /// assignNumbers()71 void RegisterPressureEstimate::assignNumbers() 72 { 73 unsigned Num = 0; 74 75 // Arguments assigned with number 0. 76 for (auto AI = m_pFunc->arg_begin(), AE = m_pFunc->arg_end(); AI != AE; ++AI) 77 { 78 Argument* Arg = &(*AI); 79 m_pNumbers[Arg] = Num; 80 if (!Arg->use_empty()) 81 { 82 getOrCreateLiveRange(Arg); 83 } 84 } 85 86 // Assign a number to basic blocks and instructions. 87 auto& BBs = m_pFunc->getBasicBlockList(); 88 for (auto BI = BBs.begin(), BE = BBs.end(); BI != BE; ++BI) 89 { 90 BasicBlock* BB = &*BI; 91 unsigned BlockNum = m_pNumbers[BB] = Num++; 92 for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II) 93 { 94 Instruction* Inst = &(*II); 95 if (isa<DbgInfoIntrinsic>(Inst)) 96 { 97 continue; 98 } 99 if (isa<PHINode>(Inst)) 100 { 101 m_pNumbers[Inst] = BlockNum; 102 } 103 else 104 { 105 m_pNumbers[Inst] = Num++; 106 } 107 if (!Inst->use_empty()) 108 { 109 getOrCreateLiveRange(Inst); 110 } 111 } 112 } 113 114 MaxAssignedNumber = Num; 115 } 116 117 /// Construct a string from a unsigned integer with the intended width. alignedString(unsigned Val)118 static std::string alignedString(unsigned Val) 119 { 120 const unsigned Width = 3; 121 std::string Str = Twine(Val).str(); 122 if (Str.size() < Width) 123 { 124 Str.insert(Str.begin(), Width - Str.size(), ' '); 125 } 126 return Str; 127 } 128 printNumbering(raw_ostream & OS)129 void RegisterPressureEstimate::printNumbering(raw_ostream& OS) 130 { 131 auto& BBs = m_pFunc->getBasicBlockList(); 132 unsigned UnamedBBNum = 1; 133 for (auto BI = BBs.begin(), BE = BBs.end(); BI != BE; ++BI) 134 { 135 if (m_pNumbers.count(&(*BI))) 136 { 137 OS << "[" << alignedString(m_pNumbers[&(*BI)]) << "] "; 138 } 139 if (BI->hasName()) 140 { 141 OS << BI->getName() << ":\n"; 142 } 143 else 144 { 145 OS << ";<label>:" + Twine(UnamedBBNum++) << "\n"; 146 } 147 148 for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II) 149 { 150 Instruction* Inst = &(*II); 151 if (m_pNumbers.count(Inst)) 152 { 153 OS << "[" << alignedString(m_pNumbers[Inst]) << "] "; 154 } 155 Inst->print(OS); 156 OS << "\n"; 157 } 158 } 159 } 160 dumpNumbering()161 void RegisterPressureEstimate::dumpNumbering() 162 { 163 printNumbering(ods()); 164 } 165 getAssignedNumberForInst(Instruction * pInst)166 unsigned RegisterPressureEstimate::getAssignedNumberForInst(Instruction* pInst) 167 { 168 return m_pNumbers[pInst]; 169 } 170 getMaxAssignedNumberForBB(BasicBlock * pBB)171 unsigned RegisterPressureEstimate::getMaxAssignedNumberForBB(BasicBlock* pBB) 172 { 173 return m_pNumbers[&pBB->back()]; 174 } 175 getMinAssignedNumberForBB(BasicBlock * pBB)176 unsigned RegisterPressureEstimate::getMinAssignedNumberForBB(BasicBlock* pBB) 177 { 178 return m_pNumbers[&pBB->front()]; 179 } 180 setBegin(unsigned Begin)181 void RegisterPressureEstimate::LiveRange::setBegin(unsigned Begin) 182 { 183 for (auto& Seg : Segments) 184 { 185 if (Seg.Begin < Begin) 186 { 187 Seg.Begin = Begin; 188 } 189 } 190 } 191 sortAndMerge()192 void RegisterPressureEstimate::LiveRange::sortAndMerge() 193 { 194 std::sort(Segments.begin(), Segments.end()); 195 unsigned NewSize = 0; 196 for (unsigned i = 0; i != Segments.size(); ++i) 197 { 198 if (NewSize && Segments[i].Begin <= Segments[NewSize - 1].End) 199 { 200 Segments[NewSize - 1].End = 201 std::max(Segments[i].End, Segments[NewSize - 1].End); 202 } 203 else 204 { 205 Segments[NewSize++] = Segments[i]; 206 } 207 } 208 Segments.resize(NewSize); 209 } 210 print(raw_ostream & OS) const211 void RegisterPressureEstimate::LiveRange::print(raw_ostream& OS) const 212 { 213 for (auto& Seg : Segments) 214 { 215 OS << " [" << Seg.Begin << ", " << Seg.End << ")"; 216 } 217 } 218 dump() const219 void RegisterPressureEstimate::LiveRange::dump() const 220 { 221 print(ods()); 222 } 223 printLiveRanges(raw_ostream & OS)224 void RegisterPressureEstimate::printLiveRanges(raw_ostream& OS) 225 { 226 OS << "\nLive ranges:"; 227 for (auto& Item : m_pLiveRanges) 228 { 229 OS << "\n"; 230 Item.first->printAsOperand(OS); 231 OS << ":\n"; 232 Item.second->print(OS); 233 OS << "\n"; 234 } 235 } 236 dumpLiveRanges()237 void RegisterPressureEstimate::dumpLiveRanges() 238 { 239 printLiveRanges(ods()); 240 } 241 242 /// The algorithm is from "Linear Scan Register Allocation On SSA Form" by 243 /// Christian Wimmer and Michael Franz, CGO 2010. 244 /// 245 /// For each block b in reverse order do 246 /// live = union of successor.livein for each successor of b 247 /// 248 /// for each phi of successors of b do 249 /// live.add(phi.inputOf(b)) 250 /// 251 /// for each opnd in live do 252 /// intervals[opnd].addRange(b.from, b.to) 253 /// 254 /// for each operation op of b in reverse order do 255 /// for each output operand opnd of op do 256 /// intervals[opnd].setFrom(op.id) 257 /// live.remove(opnd) 258 /// for each input operand opnd of op do 259 /// intervals[opnd].addRange(b.from, op.id) 260 /// live.add(opnd) 261 /// 262 /// for each phi of b do 263 /// live.remove(phi.output) 264 /// 265 /// if b is loop header then 266 /// loopEnd = last block of the loop starting at b 267 /// for each opnd in live do 268 /// intervals[opnd].addRange(b.from, loopEnd.to) 269 /// 270 /// b.livein = live 271 /// 272 /// Return true if live interval is calculated successfully; false otherwise. buildLiveIntervals(bool RemoveLR)273 bool RegisterPressureEstimate::buildLiveIntervals(bool RemoveLR) 274 { 275 // Clear existing data if any. 276 m_pNumbers.clear(); 277 278 clear(RemoveLR); 279 280 // Assign a number to arguments, basic blocks and instructions. 281 // build the live-range pool. 282 assignNumbers(); 283 284 unsigned OverallEstimate = 0; 285 // quick estimate 286 for (auto VI = m_pLiveRanges.begin(), VE = m_pLiveRanges.end(); 287 VI != VE; ++VI) 288 { 289 auto V = VI->first; 290 unsigned RangeStart = m_pNumbers[V]; 291 unsigned MaxRange = 0; 292 // need to find the last use 293 for (auto UI = V->user_begin(), UE = V->user_end(); UI != UE; ++UI) 294 { 295 Instruction* UseI = dyn_cast<Instruction>(*UI); 296 if (!UseI || isInstructionTriviallyDead(UseI)) 297 continue; 298 unsigned RangeEnd = RangeStart; 299 if (PHINode* PN = dyn_cast<PHINode>(UseI)) 300 { 301 // PHI nodes use the operand in the predecessor block, 302 // not the block with the PHI. 303 Use& U = UI.getUse(); 304 unsigned num = PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 305 auto UseBB = PN->getIncomingBlock(num); 306 RangeEnd = m_pNumbers[&UseBB->back()] + 1; 307 } 308 else 309 { 310 RangeEnd = m_pNumbers[UseI]; 311 } 312 if (RangeEnd > RangeStart && RangeEnd - RangeStart > MaxRange) 313 MaxRange = RangeEnd - RangeStart; 314 else if (RangeStart > RangeEnd && RangeStart - RangeEnd > MaxRange) 315 MaxRange = RangeStart - RangeEnd; 316 } 317 OverallEstimate += MaxRange * getValueBytes(V); 318 } 319 OverallEstimate = iSTD::Round(OverallEstimate, SIMD_PRESSURE_MULTIPLIER) / 320 SIMD_PRESSURE_MULTIPLIER; 321 OverallEstimate = OverallEstimate / getMaxAssignedNumberForFunction(); 322 if (OverallEstimate > OVERALL_PRESSURE_UPBOUND) 323 return false; 324 325 DenseMap<BasicBlock*, std::set<Value*>> BlockLiveMap; 326 auto& BBs = m_pFunc->getBasicBlockList(); 327 // Top level loop to visit each block once in reverse order. 328 for (auto BI = BBs.rbegin(), BE = BBs.rend(); BI != BE; ++BI) 329 { 330 BasicBlock* BB = &*BI; 331 auto Result = BlockLiveMap.insert(std::make_pair(BB, std::set<Value*>())); 332 IGC_ASSERT_MESSAGE(Result.second, "must not be processed yet"); 333 std::set<Value*>& Live = Result.first->second; 334 335 // live = union of successor.livein for each successor of b 336 // 337 // for each phi of successors of b do 338 // live.add(phi.inputOf(b)) 339 // 340 for (auto PI = succ_begin(BB), PE = succ_end(BB); PI != PE; ++PI) 341 { 342 BasicBlock* Succ = *PI; 343 auto Iter = BlockLiveMap.find(Succ); 344 if (Iter != BlockLiveMap.end()) 345 { 346 std::set<Value*>& SuccLive = Iter->second; 347 Live.insert(SuccLive.begin(), SuccLive.end()); 348 } 349 350 // For each phi node from successors, update liveness. 351 for (auto II = Succ->begin(), IE = Succ->end(); II != IE; ++II) 352 { 353 Instruction* Inst = &*II; 354 if (auto PN = dyn_cast<PHINode>(Inst)) 355 { 356 Live.insert(PN->getIncomingValueForBlock(BB)); 357 } 358 else 359 { 360 // all phi's are in the first few instructions. 361 break; 362 } 363 } 364 } 365 366 // The basic block number. 367 unsigned BlockNum = m_pNumbers[BB]; 368 369 // for each opnd in live do 370 // intervals[opnd].addRange(b.from, b.to) 371 for (auto I = Live.begin(), E = Live.end(); I != E; ++I) 372 { 373 unsigned End = m_pNumbers[&BB->back()] + 1; 374 Value* V = *I; 375 if (auto LR = getLiveRangeOrNull(V)) 376 { 377 LR->addSegment(BlockNum, End); 378 } 379 } 380 // for each operation op of b in reverse order do 381 // for each output operand opnd of op do 382 // intervals[opnd].setFrom(op.id) 383 // live.remove(opnd) 384 // for each input operand opnd of op do 385 // intervals[opnd].addRange(b.from, op.id) 386 // live.add(opnd) 387 for (auto II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) 388 { 389 Instruction* Inst = &*II; 390 391 // Skip debugging intrinsic calls. 392 if (isa<DbgInfoIntrinsic>(Inst)) 393 { 394 continue; 395 } 396 397 // Skip phi nodes and its predecessors decide the live variables. 398 if (isa<PHINode>(Inst)) 399 { 400 continue; 401 } 402 403 // The instruction number. 404 unsigned InstNum = m_pNumbers[Inst]; 405 406 if (!Inst->use_empty()) 407 { 408 if (auto LR = getLiveRangeOrNull(Inst)) 409 { 410 LR->setBegin(InstNum); 411 } 412 Live.erase(Inst); 413 } 414 415 // Handle its input operands. 416 for (auto OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) 417 { 418 Value* Opnd = *OI; 419 if (isa<Argument>(Opnd) || isa<Instruction>(Opnd)) 420 { 421 if (LiveRange * LR = getLiveRangeOrNull(Opnd)) 422 { 423 LR->addSegment(BlockNum, InstNum); 424 } 425 Live.insert(Opnd); 426 } 427 } 428 } 429 430 // for each phi of b do 431 // live.remove(phi.output) 432 // 433 for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II) 434 { 435 Instruction* Inst = &*II; 436 if (isa<PHINode>(Inst)) 437 { 438 Live.erase(Inst); 439 } 440 else 441 { 442 // all phi's are in the first few instructions. 443 break; 444 } 445 } 446 447 // if b is loop header then 448 // loopEnd = last block of the loop starting at b 449 // for each opnd in live do 450 // intervals[opnd].addRange(b.from, loopEnd.to) 451 // 452 if (LI->isLoopHeader(BB)) 453 { 454 Loop* L = LI->getLoopFor(BB); 455 if (L != nullptr) 456 { 457 if (BasicBlock * Latch = L->getLoopLatch()) 458 { 459 for (auto I = Live.begin(), E = Live.end(); I != E; ++I) 460 { 461 unsigned End = m_pNumbers[&Latch->back()] + 1; 462 if (auto LR = getLiveRangeOrNull(*I)) 463 { 464 LR->addSegment(BlockNum, End); 465 } 466 } 467 } 468 } 469 else 470 { 471 // Just set unavailable of live range info for now. 472 clear(RemoveLR); 473 return false; 474 // IGC_ASSERT_EXIT_MESSAGE(0, "Support for unnatural loops, not implemented yet"); 475 } 476 } 477 } 478 479 // Finally, combine multiple live ranges into a single one and sort them. 480 mergeLiveRanges(); 481 return true; 482 } 483 getRegisterPressureForInstructionFromRPMap(unsigned number) const484 unsigned RegisterPressureEstimate::getRegisterPressureForInstructionFromRPMap(unsigned number) const 485 { 486 auto Iter = m_pRegisterPressureByInstruction.find(number); 487 if (Iter != m_pRegisterPressureByInstruction.end()) 488 { 489 return iSTD::Round(Iter->second, SIMD_PRESSURE_MULTIPLIER) / SIMD_PRESSURE_MULTIPLIER; 490 } 491 else 492 { 493 return 0; 494 } 495 } 496 buildRPMapPerInstruction()497 void RegisterPressureEstimate::buildRPMapPerInstruction() 498 { 499 unsigned maxNumberOfInstructions = getMaxAssignedNumberForFunction(); 500 for (unsigned number = 0; number < maxNumberOfInstructions; number++) 501 { 502 m_pRegisterPressureByInstruction[number] = 0; 503 } 504 505 // Segments are sorted. 506 for (auto I = m_pLiveRanges.begin(), E = m_pLiveRanges.end(); I != E; ++I) 507 { 508 Value* V = I->first; 509 unsigned int pressure = getValueBytes(V); 510 for (auto& Seg : I->second->Segments) 511 { 512 for (unsigned number = Seg.Begin; number < Seg.End; number++) 513 { 514 m_pRegisterPressureByInstruction[number] += pressure; 515 } 516 } 517 } 518 return; 519 } 520 getRegisterPressure(Instruction * Inst) const521 unsigned RegisterPressureEstimate::getRegisterPressure(Instruction* Inst) const 522 { 523 auto Iter = m_pNumbers.find(Inst); 524 if (Iter != m_pNumbers.end()) 525 { 526 // Find the instruction location. 527 unsigned N = Iter->second; 528 529 // Now sum all intervals that contain this location. 530 unsigned Pressure = 0; 531 532 // Segments are sorted. 533 for (auto I = m_pLiveRanges.begin(), E = m_pLiveRanges.end(); I != E; ++I) 534 { 535 if (I->second->contains(N)) 536 { 537 Value* V = I->first; 538 Pressure += getValueBytes(V); 539 } 540 } 541 542 return iSTD::Round(Pressure, SIMD_PRESSURE_MULTIPLIER) / SIMD_PRESSURE_MULTIPLIER; 543 } 544 545 // ignore this instruction. 546 return 0; 547 } 548 getMaxRegisterPressure() const549 unsigned RegisterPressureEstimate::getMaxRegisterPressure() const 550 { 551 unsigned MaxPressure = 0; 552 for (auto BI = m_pFunc->begin(), BE = m_pFunc->end(); BI != BE; ++BI) 553 { 554 for (auto II = BI->begin(), IE = BI->end(); II != IE; ++II) 555 { 556 Instruction* Inst = &(*II); 557 MaxPressure = std::max(MaxPressure, getRegisterPressure(Inst)); 558 } 559 } 560 561 return MaxPressure; 562 } 563 getMaxRegisterPressure(BasicBlock * BB) const564 unsigned RegisterPressureEstimate::getMaxRegisterPressure(BasicBlock* BB) const 565 { 566 unsigned RP = 0; 567 for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II) 568 { 569 Instruction* Inst = &(*II); 570 RP = std::max(RP, getRegisterPressure(Inst)); 571 } 572 return RP; 573 } 574 printRegisterPressureInfo(bool Detailed,const char * msg) const575 void RegisterPressureEstimate::printRegisterPressureInfo 576 (bool Detailed, 577 const char* msg) const 578 { 579 unsigned MaxRP = getMaxRegisterPressure(); 580 if (Detailed) 581 { 582 for (inst_iterator I = inst_begin(m_pFunc), E = inst_end(m_pFunc); I != E; ++I) 583 { 584 Instruction* Inst = &*I; 585 ods() << "[RP = " << getRegisterPressure(Inst) << "]"; 586 Inst->print(ods()); 587 ods() << "\n"; 588 } 589 } 590 591 ods() << "; " << msg << "\n"; 592 ods() << "; Kernel " << m_pFunc->getName() << "\n"; 593 ods() << "; Max RP = " << MaxRP << " bytes, (" << ((MaxRP + 31) / 32) 594 << " GRFs)\n\n"; 595 } 596 } 597