1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "Compiler/CISACodeGen/WIAnalysis.hpp" 12 #include "Compiler/CISACodeGen/PatternMatchPass.hpp" 13 #include "Compiler/MetaDataUtilsWrapper.h" 14 #include "Compiler/CISACodeGen/PayloadMapping.hpp" 15 #include "common/LLVMWarningsPush.hpp" 16 #include <llvm/Pass.h> 17 #include <llvm/ADT/DenseSet.h> 18 #include <llvm/IR/InstVisitor.h> 19 #include <llvm/IR/Dominators.h> 20 #include <llvm/Support/Debug.h> 21 #include <llvm/Support/Allocator.h> 22 #include "common/LLVMWarningsPop.hpp" 23 #include "GenISAIntrinsics/GenIntrinsicInst.h" 24 #include "Probe/Assertion.h" 25 26 namespace IGC { 27 28 class CEncoder; 29 class CVariable; 30 class CodeGenContextWrapper; 31 class DeSSA; 32 33 34 class CoalescingEngine : public llvm::FunctionPass, public llvm::InstVisitor<CoalescingEngine> 35 { 36 //TODO: this is fixed for now, but once we have pressure heuristic, could be relaxed 37 static const int MaxTupleSize = 12; 38 39 public: 40 static char ID; // Pass identification, replacement for typeid 41 CoalescingEngine(); 42 43 virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override; 44 releaseMemory()45 virtual void releaseMemory() override { 46 Allocator.Reset(); 47 48 for (auto itr = m_CCTupleList.begin(), 49 iend = m_CCTupleList.end(); 50 itr != iend; ++itr) 51 { 52 CCTuple* ccTuple = *itr; 53 delete ccTuple; 54 } 55 m_CCTupleList.clear(); 56 //Nodes need not be deallocated since they are 57 //owned by bump allocator (Allocator), and are destroyed 58 //once bump allocator goes out the the scope. 59 NodeCCTupleMap.clear(); 60 ValueNodeMap.clear(); 61 BBProcessingDefs.clear(); 62 NodeOffsetMap.clear(); 63 } 64 65 bool runOnFunction(llvm::Function&) override; 66 getPassName() const67 virtual llvm::StringRef getPassName() const override { 68 return "CoalescingEngine"; 69 } 70 71 /// print - print partitions in human readable form 72 void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const override; 73 74 /// dump - Dump the partitions to dbgs(). 75 void dump() const; 76 77 void ProcessBlock(llvm::BasicBlock* bb); 78 79 // 80 bool MatchSingleInstruction(llvm::GenIntrinsicInst* I); 81 82 GetSingleElementWidth(SIMDMode simdMode,const llvm::DataLayout * pDL,llvm::Value * val)83 int GetSingleElementWidth( 84 SIMDMode simdMode, 85 const llvm::DataLayout* pDL, 86 llvm::Value* val) 87 { 88 int result = 0; 89 int mult = 1; 90 if (val->getType()->isHalfTy() && simdMode == SIMDMode::SIMD8) 91 { 92 mult = 2; 93 } 94 95 result = int_cast<int>(mult * 96 numLanes(simdMode) * 97 pDL->getTypeAllocSize(val->getType())); 98 99 return result; 100 } 101 102 // 103 CVariable* PrepareExplicitPayload( 104 CShader* outProgram, 105 CEncoder* encoder, 106 SIMDMode simdMode, 107 const llvm::DataLayout* pDL, 108 llvm::Instruction* inst, 109 int& payloadOffset); 110 111 112 void visitCastInst(llvm::CastInst& I); 113 void visitBinaryOperator(llvm::BinaryOperator& I); 114 void visitCmpInst(llvm::CmpInst& I); 115 void visitPHINode(llvm::PHINode& I); 116 void visitUnaryInstruction(llvm::UnaryInstruction& I); 117 void visitCallInst(llvm::CallInst& I); 118 void visitSelectInst(llvm::SelectInst& I); 119 void visitBitCastInst(llvm::BitCastInst& I); 120 void visitInstruction(llvm::Instruction& I); 121 void visitLoadInst(llvm::LoadInst& I); 122 void visitStoreInst(llvm::StoreInst& I); 123 124 125 ////////////////////////////////////////////////////////////////////////// 126 struct ElementNode 127 { 128 enum Flags { 129 kRegisterIsolatedFlag = 1, 130 kPHIIsolatedFlag = 2 131 }; ElementNodeIGC::CoalescingEngine::ElementNode132 ElementNode(llvm::Value* _value) : 133 value(_value), rank(0) 134 { 135 parent.setPointer(this); 136 } 137 ~ElementNodeIGC::CoalescingEngine::ElementNode138 ~ElementNode() 139 { 140 } 141 142 ElementNode* getLeader(); 143 144 // Fields: 145 llvm::PointerIntPair<ElementNode*, 2> parent; 146 llvm::Value* value; 147 unsigned rank; 148 }; 149 150 ////////////////////////////////////////////////////////////////////////// 151 struct CCTuple 152 { CCTupleIGC::CoalescingEngine::CCTuple153 CCTuple() : 154 leftBound(0), 155 rightBound(0), 156 hasNonHomogeneousElements(false), 157 rootInst(nullptr) 158 { 159 } 160 161 int leftBound; 162 int rightBound; 163 164 GetLeftBoundIGC::CoalescingEngine::CCTuple165 int GetLeftBound() const 166 { 167 return leftBound; 168 } 169 GetRightBoundIGC::CoalescingEngine::CCTuple170 int GetRightBound() const 171 { 172 return rightBound; 173 } 174 175 176 //cannot be safely extended left/right (used for non-homogeneous payload modeling) 177 bool hasNonHomogeneousElements; 178 179 llvm::Instruction* rootInst; 180 HasNonHomogeneousElementsIGC::CoalescingEngine::CCTuple181 inline bool HasNonHomogeneousElements() const 182 { 183 return hasNonHomogeneousElements; 184 } 185 SetHasNonHomogeneousElementsIGC::CoalescingEngine::CCTuple186 inline void SetHasNonHomogeneousElements(llvm::Instruction* _rootInst) 187 { 188 IGC_ASSERT(rootInst == nullptr); 189 rootInst = _rootInst; 190 hasNonHomogeneousElements = true; 191 } 192 GetRootIGC::CoalescingEngine::CCTuple193 inline llvm::Instruction* GetRoot() const 194 { 195 return rootInst; 196 } 197 198 GetNumElementsIGC::CoalescingEngine::CCTuple199 inline uint GetNumElements() const 200 { 201 IGC_ASSERT(rightBound >= leftBound); 202 return rightBound - leftBound + 1; 203 } 204 InitializeIndexWithCCRootIGC::CoalescingEngine::CCTuple205 inline void InitializeIndexWithCCRoot(int index, ElementNode* elNode) 206 { 207 OffsetToCCMap[index] = elNode; 208 209 ResizeBounds(index); 210 } 211 ResizeBoundsIGC::CoalescingEngine::CCTuple212 inline void ResizeBounds(int index) 213 { 214 if (index < leftBound) 215 { 216 leftBound = index; 217 } 218 219 if (index > rightBound) 220 { 221 rightBound = index; 222 } 223 } 224 CheckIndexForNonInitializedSlotIGC::CoalescingEngine::CCTuple225 inline void CheckIndexForNonInitializedSlot(int index) 226 { 227 //if (OffsetToCCMap.count(index) == 0) 228 { 229 ResizeBounds(index); 230 } 231 } 232 233 GetCCNodeWithIndexIGC::CoalescingEngine::CCTuple234 ElementNode* GetCCNodeWithIndex(int index) 235 { 236 return OffsetToCCMap[index]; 237 } 238 AttachNodeAtIndexIGC::CoalescingEngine::CCTuple239 void AttachNodeAtIndex(int index, ElementNode* node, CoalescingEngine& CE) 240 { 241 if (OffsetToCCMap.count(index) == 0 || OffsetToCCMap[index] == NULL) 242 { 243 IGC_ASSERT(node->value == CE.getRegRoot(node->value)); 244 245 CE.NodeCCTupleMap[node] = this; 246 CE.NodeOffsetMap[node] = index; 247 248 InitializeIndexWithCCRoot(index, node); 249 CE.CurrentDominatingParent[node->value] = node->value; 250 CE.ImmediateDominatingParent[node->value] = NULL; 251 } 252 else 253 { 254 if (OffsetToCCMap[index] == node) 255 { 256 //Do nothing, it is already there. 257 } 258 else 259 { 260 //Here comes a logic that will attach a new node to CC. 261 IGC_ASSERT(OffsetToCCMap.count(index)); 262 IGC_ASSERT(OffsetToCCMap[index]); 263 264 llvm::Value* ccRootValue = OffsetToCCMap[index]->value; 265 266 CE.unionRegs(ccRootValue, node->value); 267 268 llvm::Value* NewParent = CE.GetActualDominatingParent( 269 ccRootValue, 270 llvm::dyn_cast<llvm::Instruction>(node->value)); 271 272 CE.ImmediateDominatingParent[node->value] = NewParent; 273 CE.CurrentDominatingParent[ccRootValue] = node->value; 274 } 275 } 276 } 277 278 //print in human readable form 279 void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const; 280 281 void dump() const; 282 283 //FIXME: not sure whether this is the best choice 284 protected: 285 llvm::DenseMap<int, ElementNode*> OffsetToCCMap; 286 287 friend class CoalescingEngine; 288 }; 289 290 friend struct CCTuple; 291 292 ///returns non-null CCtuple pointer if there is at least 293 // one value that is 'coalesced' into the tuple. 294 // It returns one of those values (e.g. for type insepction) 295 // if the condition is true. 296 CCTuple* IsAnyValueCoalescedInCCTuple( 297 llvm::Instruction* inst, 298 const uint numOperands, 299 int& zeroBasedPayloadElementOffset, 300 llvm::Value*& val); 301 302 /// 303 bool IsPayloadCovered( 304 llvm::Instruction* inst, 305 CCTuple* ccTuple, 306 const uint numOperands, 307 const int payloadToCCTupleRelativeOffset); 308 309 310 //FIXME: do a filter over relevant (TGI) instructions DetermineWeight(llvm::Value * val)311 uint DetermineWeight(llvm::Value* val) 312 { 313 if (ValueWeightMap.count(val)) { 314 return ValueWeightMap[val]; 315 } 316 else { 317 uint numUsers = 0; 318 { 319 llvm::SmallPtrSet<llvm::User*, 8> touchedUsers; 320 for (llvm::Value::user_iterator i = val->user_begin(), e = val->user_end(); i != e; ++i) { 321 llvm::User* user = *i; 322 if (llvm::isa<llvm::Instruction>(user)) { 323 if (!touchedUsers.count(user)) 324 { 325 numUsers++; 326 touchedUsers.insert(user); 327 } 328 } 329 } 330 } 331 ValueWeightMap[val] = numUsers; 332 return numUsers; 333 } 334 } 335 336 //FIXME: this might not be the most effective way if called multiple times IsolationFilter(llvm::Value * val) const337 bool IsolationFilter(llvm::Value* val) const 338 { 339 if (isCoalescedByDeSSA(val)) { 340 return true; 341 } 342 if (llvm::dyn_cast<llvm::PHINode>(val)) { 343 return true; 344 } 345 else if (llvm::dyn_cast<llvm::ExtractElementInst>(val)) { 346 return true; 347 } 348 else { 349 for (llvm::Value::user_iterator i = val->user_begin(), e = val->user_end(); i != e; ++i) 350 { 351 if (llvm::dyn_cast<llvm::PHINode>(*i)) { 352 return true; 353 } 354 } 355 } 356 357 return false; 358 } 359 IsValIsolated(llvm::Value * val)360 bool IsValIsolated(llvm::Value* val) 361 { 362 return IsolationFilter(val) || 363 isUniform(val) || 364 llvm::isa<llvm::Argument>(val); 365 } 366 IsValConstOrIsolated(llvm::Value * val) const367 inline bool IsValConstOrIsolated(llvm::Value* val) const 368 { 369 return llvm::isa<llvm::Constant>(val) || 370 IsolationFilter(val) || 371 isUniform(val) || 372 llvm::isa<llvm::Argument>(val); 373 } 374 375 376 void IncrementalCoalesce(llvm::BasicBlock*); 377 void PrepareTuple( 378 const uint numOperands, 379 llvm::Instruction* tupleGeneratingInstruction, 380 llvm::SmallPtrSet<CCTuple*, 8> & touchedTuplesSet, 381 llvm::SmallVector<CCTuple*, 8> & touchedTuples, 382 bool& isAnyNodeAnchored, 383 bool& isAnyNodeCoalescable); 384 385 void DecideSplit(llvm::Instruction* tupleGeneratingInstruction); 386 387 void CreateTuple( 388 const uint numOperands, 389 llvm::Instruction* tupleGeneratingInstruction); 390 391 void DetermineAnchor( 392 const uint numOperands, 393 const llvm::Instruction* tupleGeneratingInstruction, 394 CCTuple*& ccTuple, 395 int& rootTupleStartOffset, 396 int& thisTupleStartOffset); 397 398 //Return true if it is ok to continue, false if interference is detected EvictOrStop(CCTuple * ccTuple,const int index,llvm::Instruction * tupleGeneratingInstruction,bool const forceEviction,bool & interferes)399 bool EvictOrStop( 400 CCTuple* ccTuple, 401 const int index, 402 llvm::Instruction* tupleGeneratingInstruction, 403 bool const forceEviction, 404 //out: 405 bool& interferes) 406 { 407 if (!IsInsertionSlotAvailable(ccTuple, index, tupleGeneratingInstruction)) { 408 if (forceEviction) { 409 PrepareInsertionSlot(ccTuple, index, tupleGeneratingInstruction); 410 return true; //we can continue 411 } 412 else { 413 interferes = true; 414 return false; //abort whole 'transaction' 415 } 416 } 417 return true; 418 } 419 420 class ProcessInterferencesElementFunctor; 421 422 bool InterferenceCheck( 423 const uint numOperands, 424 llvm::Instruction* tupleGeneratingInstruction, 425 const int offsetDiff, 426 CCTuple* ccTuple, 427 //out: 428 ProcessInterferencesElementFunctor* interferencesFunctor); 429 430 /// \brief return true if element slot is safe for copying-in 431 /// canExtend - if true, it is assumed that ccTuple can be 'still' extended 432 /// 433 /// 434 bool IsInsertionSlotAvailable( 435 CCTuple* ccTuple, 436 const int index, 437 llvm::Instruction* tupleGeneratingInstruction, 438 const bool canExtend = true); 439 440 void PrepareInsertionSlot( 441 CCTuple* ccTuple, 442 const int index, 443 llvm::Instruction* tupleGeneratingInstruction, 444 const bool evictFullCongruenceClass = false); 445 446 bool CheckIntersectionForNonHomogeneous( 447 const uint numOperands, 448 llvm::Instruction* tupleGeneratingInstruction, 449 const int offsetDiff, 450 CCTuple* ccTuple); 451 452 void ProcessTuple(llvm::Instruction* tupleGeneratingInstruction); 453 void GreedyHeuristic(llvm::Instruction* tupleGeneratingInstruction); 454 GetCCTupleList()455 std::vector<CCTuple*>& GetCCTupleList() 456 { 457 return m_CCTupleList; 458 } 459 460 /// Get the union-root of a register. The root is 0 if the register has been 461 /// isolated. 462 llvm::Value* getRegRoot(llvm::Value*) const; 463 464 ///Given the currentInstruction and root identifier of CC, find the element 465 ///that belongs to CC and is a proper dominator of the currentInstruction. 466 ///In straight-line codes and in separate payload and de-ssa phases, this 467 ///actually should always give the results in one iteration. 468 // Pop registers from the stack represented by ImmediateDominatingParent 469 // until we find a parent that dominates the current instruction. GetActualDominatingParent(llvm::Value * RootV,llvm::Instruction * currentInstruction)470 inline llvm::Value* GetActualDominatingParent(llvm::Value* RootV, llvm::Instruction* currentInstruction) 471 { 472 llvm::Value* NewParent = CurrentDominatingParent[RootV]; 473 474 while (NewParent) { 475 if (getRegRoot(NewParent)) { 476 //Fixme: here it does not apply. 477 // we have added the another condition because the domination-test 478 // does not work between two phi-node. See the following comments 479 // from the DT::dominates: 480 // " It is not possible to determine dominance between two PHI nodes 481 // based on their ordering 482 // if (isa<PHINode>(A) && isa<PHINode>(B)) 483 // return false;" 484 if (llvm::isa<llvm::Argument>(NewParent)) { 485 break; 486 } 487 else if (DT->dominates(llvm::cast<llvm::Instruction>(NewParent), currentInstruction)) { 488 break; 489 } /* else if (cast<llvm::Instruction>(NewParent)->getParent() == MBB && 490 isa<PHINode>(DefMI) && isa<PHINode>(NewParent)) { 491 break; 492 } */ 493 } 494 NewParent = ImmediateDominatingParent[NewParent]; 495 } 496 497 return NewParent; 498 } 499 500 //Here we test the interference between the 501 //Algorithm: 502 //Starting from the currentDominatingParent, walk the congruence class 503 //dominance tree upward, until an element that dominates the currentInstruction 504 //is found. 505 //Since this method could be called on the non-dominance traversal (e.g. 506 //currentInstruction dominates some elements in the tree) 507 // Say we have a dominance tree that is already constructed (lifetime goes 508 // downwards): 509 // CC dominance tree 510 // v67 511 // ^ 512 // | 513 // v121 (dominating) 514 // ^ 515 // | ----- v181 516 // | 517 // v190 <- (dominated) 518 // 519 // Want to check the interference with v181, which dominates v190, but is dominated 520 // by v121. if we would just look for a dominating element for v181, we would find 521 // out that v121 is dominator of v181 and (say) it is not interfering with v181, thus 522 // leading to conclusion that there is no interference between v181 and CC dominance tree. 523 SymmetricInterferenceTest(llvm::Value * RootV,llvm::Instruction * currentInstruction,llvm::Value * & dominating,llvm::Value * & dominated)524 inline void SymmetricInterferenceTest( 525 llvm::Value* RootV, 526 llvm::Instruction* currentInstruction, 527 llvm::Value*& dominating, //in-out 528 llvm::Value*& dominated) 529 { 530 llvm::Value* NewParent = CurrentDominatingParent[RootV]; 531 dominating = nullptr; 532 dominated = nullptr; 533 534 while (NewParent) 535 { 536 if (getRegRoot(NewParent)) //not isolated 537 { 538 if (llvm::isa<llvm::Argument>(NewParent)) 539 { 540 dominating = NewParent; 541 break; 542 } 543 else if (DT->dominates(llvm::cast<llvm::Instruction>(NewParent), currentInstruction)) 544 { 545 dominating = NewParent; 546 break; 547 } 548 dominated = NewParent; 549 } 550 NewParent = ImmediateDominatingParent[NewParent]; 551 } 552 } 553 GetValueCCTupleMapping(llvm::Value * val)554 inline CCTuple* GetValueCCTupleMapping(llvm::Value* val) 555 { 556 llvm::Value* RootV = getRegRoot(val); 557 if (!RootV) { 558 return NULL; 559 } 560 561 IGC_ASSERT(ValueNodeMap.count(RootV)); 562 auto RI = ValueNodeMap.find(RootV); 563 ElementNode* Node = RI->second; 564 565 auto CCI = NodeCCTupleMap.find(Node); 566 if (CCI != NodeCCTupleMap.end()) { 567 return CCI->second; 568 } 569 else { 570 return NULL; 571 } 572 } 573 574 /// Caller is responsible for assuring that value is not isolated 575 // e.g. by calling GetCalueCCTupleMapping previously. GetValueOffsetInCCTuple(llvm::Value * val)576 inline int GetValueOffsetInCCTuple(llvm::Value* val) 577 { 578 llvm::Value* RootV = getRegRoot(val); 579 IGC_ASSERT(nullptr != RootV); 580 IGC_ASSERT(ValueNodeMap.count(RootV)); 581 582 auto RI = ValueNodeMap.find(RootV); 583 ElementNode* Node = RI->second; 584 585 auto CCI = NodeOffsetMap.find(Node); 586 IGC_ASSERT(CCI != NodeOffsetMap.end()); 587 return CCI->second; 588 } 589 590 ////////////////////////////////////////////////////////////////////////// 591 class ElementFunctor { 592 public: 593 virtual void SetIndex(int index) = 0; 594 virtual bool visitCopy() = 0; 595 virtual bool visitConstant() = 0; 596 virtual bool visitArgument() = 0; 597 virtual bool visitIsolated() = 0; 598 virtual bool visitAnchored() = 0; 599 virtual bool visitInterfering(llvm::Value* val, const bool evictFullCongruenceClass) = 0; 600 virtual bool visitPackedNonInterfering(llvm::Value* val) = 0; ~ElementFunctor()601 virtual ~ElementFunctor() {} 602 }; 603 604 ////////////////////////////////////////////////////////////////////////// 605 class GatherWeightElementFunctor : public ElementFunctor 606 { 607 public: GatherWeightElementFunctor()608 GatherWeightElementFunctor() : 609 nAlignedAnchors(0), 610 nInsertionSlotRequired(0), 611 nNeedsDisplacement(0) 612 { 613 } 614 SetIndex(int index)615 virtual void SetIndex(int index) 616 { 617 } 618 visitCopy()619 virtual bool visitCopy() 620 { 621 nInsertionSlotRequired++; 622 return true; 623 } 624 visitConstant()625 virtual bool visitConstant() 626 { 627 nInsertionSlotRequired++; 628 return true; 629 } 630 631 ///Not used yet, but is here in an interface 632 ///for completeness. visitArgument()633 virtual bool visitArgument() 634 { 635 return true; 636 } 637 visitIsolated()638 virtual bool visitIsolated() 639 { 640 nInsertionSlotRequired++; 641 return true; 642 } 643 644 ///Visits constrained (anchored) element. visitAnchored()645 virtual bool visitAnchored() 646 { 647 nAlignedAnchors++; 648 return true; 649 } 650 651 ///Visits a value in a tuple that interferes with some (non-isolated) 652 ///element in a CC class that occupies the same slot. visitInterfering(llvm::Value * val,const bool evictFullCongruenceClass)653 virtual bool visitInterfering( 654 llvm::Value* val, 655 const bool evictFullCongruenceClass) 656 { 657 nNeedsDisplacement++; 658 return true; 659 } 660 visitPackedNonInterfering(llvm::Value * val)661 virtual bool visitPackedNonInterfering(llvm::Value* val) 662 { 663 return true; 664 } 665 666 ///Gets the number of 'anchored' elements in a tuple. GetNumAlignedAnchors() const667 inline int GetNumAlignedAnchors() const 668 { 669 return nAlignedAnchors; 670 } 671 GetNumInsertionSlotsRequired() const672 inline int GetNumInsertionSlotsRequired() const 673 { 674 return nInsertionSlotRequired; 675 } 676 677 ///Gets the number of values in a tuple GetNumNeedsDisplacement() const678 inline int GetNumNeedsDisplacement() const 679 { 680 return nNeedsDisplacement; 681 } 682 683 private: 684 int nAlignedAnchors; 685 int nInsertionSlotRequired; 686 int nNeedsDisplacement; 687 }; 688 689 ////////////////////////////////////////////////////////////////////////// 690 class ProcessInterferencesElementFunctor : public ElementFunctor 691 { 692 private: 693 bool m_forceEviction; 694 bool m_interferes; 695 llvm::Instruction* m_tupleInst; 696 const int m_offsetDiff; 697 CCTuple* m_ccTuple; 698 CoalescingEngine* m_CE; 699 int m_index; 700 llvm::SmallPtrSet<llvm::Value*, 8> m_valuesForIsolation; 701 702 public: ProcessInterferencesElementFunctor(bool forceEviction,llvm::Instruction * inst,const int offsetDiff,CCTuple * ccTuple,CoalescingEngine * CE)703 ProcessInterferencesElementFunctor( 704 bool forceEviction, 705 llvm::Instruction* inst, 706 const int offsetDiff, 707 CCTuple* ccTuple, 708 CoalescingEngine* CE) : 709 m_forceEviction(forceEviction), 710 m_interferes(false), 711 m_tupleInst(inst), 712 m_offsetDiff(offsetDiff), 713 m_ccTuple(ccTuple), 714 m_CE(CE), 715 m_index(0) 716 { 717 718 } 719 GetComputedValuesForIsolation()720 llvm::SmallPtrSet<llvm::Value*, 8> & GetComputedValuesForIsolation() 721 { 722 return m_valuesForIsolation; 723 } 724 SetForceEviction(bool force)725 inline void SetForceEviction(bool force) 726 { 727 m_forceEviction = force; 728 } 729 IsInterfering() const730 inline bool IsInterfering() const 731 { 732 return m_interferes; 733 } 734 SetIndex(int index)735 virtual void SetIndex(int index) 736 { 737 m_index = index; 738 } 739 visitCopy()740 virtual bool visitCopy() 741 { 742 return m_CE->EvictOrStop( 743 m_ccTuple, 744 m_index + m_offsetDiff, 745 m_tupleInst, 746 m_forceEviction, 747 m_interferes); 748 } visitConstant()749 virtual bool visitConstant() 750 { 751 return m_CE->EvictOrStop( 752 m_ccTuple, 753 m_index + m_offsetDiff, 754 m_tupleInst, 755 m_forceEviction, 756 m_interferes); 757 } visitIsolated()758 virtual bool visitIsolated() 759 { 760 return m_CE->EvictOrStop( 761 m_ccTuple, 762 m_index + m_offsetDiff, 763 m_tupleInst, 764 m_forceEviction, 765 m_interferes); 766 } 767 visitArgument()768 virtual bool visitArgument() 769 { 770 return m_CE->EvictOrStop( 771 m_ccTuple, 772 m_index + m_offsetDiff, 773 m_tupleInst, 774 m_forceEviction, 775 m_interferes); 776 } 777 visitAnchored()778 virtual bool visitAnchored() 779 { 780 return true; 781 } 782 visitInterfering(llvm::Value * val,const bool evictFullCongruenceClass)783 virtual bool visitInterfering(llvm::Value* val, const bool evictFullCongruenceClass) 784 { 785 if (m_forceEviction) { 786 787 // HEURISTIC: 788 if (m_CE->DetermineWeight(val) < 2) { 789 790 m_CE->PrepareInsertionSlot( 791 m_ccTuple, 792 m_index + m_offsetDiff, 793 m_tupleInst, 794 false //evictFullCongruenceClass 795 ); 796 797 m_valuesForIsolation.insert(val); 798 } 799 else 800 { 801 m_CE->PrepareInsertionSlot( 802 m_ccTuple, 803 m_index + m_offsetDiff, 804 m_tupleInst, 805 true //evictFullCongruenceClass 806 ); 807 808 } 809 return true; 810 } 811 else { 812 m_interferes = true; 813 return false; 814 } 815 816 return true; 817 } 818 visitPackedNonInterfering(llvm::Value * val)819 virtual bool visitPackedNonInterfering(llvm::Value* val) 820 { 821 if (m_CE->DetermineWeight(val) < 2) { 822 m_valuesForIsolation.insert(val); 823 } 824 825 return true; 826 } 827 }; 828 829 void ProcessElements( 830 const uint numOperands, 831 llvm::Instruction* tupleInst, 832 const int offsetDiff, 833 CCTuple* ccTuple, 834 ElementFunctor* functor); 835 836 public: 837 GetNumPayloadElements(llvm::Instruction * inst)838 uint GetNumPayloadElements(llvm::Instruction* inst) 839 { 840 if (currentPart_ == 0) 841 { 842 auto iter = splitPoint_.find(inst); 843 if (iter == splitPoint_.end()) 844 { 845 //means we have no split point, can 846 return m_PayloadMapping.GetNumPayloadElements(inst); 847 } 848 else 849 { 850 IGC_ASSERT((*iter).second > 0); 851 return (*iter).second; 852 } 853 //return splitPoint_ 854 } 855 else 856 { 857 IGC_ASSERT(currentPart_ == 1); 858 auto iter = splitPoint_.find(inst); 859 IGC_ASSERT(iter != splitPoint_.end()); 860 return m_PayloadMapping.GetNumPayloadElements(inst) - (*iter).second; 861 } 862 } 863 //uint GetNonAdjustedNumPayloadElements_Sample(const SampleIntrinsic *inst) 864 //{ 865 // return m_PayloadMapping.GetNonAdjustedNumPayloadElements_Sample(inst); 866 //} 867 GetNumPayloadElements_Sample(const llvm::SampleIntrinsic * inst)868 int GetNumPayloadElements_Sample(const llvm::SampleIntrinsic* inst) 869 { 870 return m_PayloadMapping.GetNumPayloadElements_Sample(inst); 871 } 872 873 /// Get the mapping from the payload element (at position index) 874 /// to intrinsic argument value. Indexing is zero based. GetPayloadElementToValueMapping(const llvm::Instruction * inst,uint index)875 llvm::Value* GetPayloadElementToValueMapping(const llvm::Instruction* inst, uint index) 876 { 877 return m_PayloadMapping.GetPayloadElementToValueMapping(inst, currentLowBound_ + index); 878 } GetPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic * inst,const uint index)879 llvm::Value* GetPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic* inst, const uint index) 880 { 881 return m_PayloadMapping.GetPayloadElementToValueMapping_sample(inst, index); 882 } GetNonAdjustedPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic * inst,const uint index)883 llvm::Value* GetNonAdjustedPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic* inst, const uint index) 884 { 885 return m_PayloadMapping.GetNonAdjustedPayloadElementToValueMapping_sample(inst, index); 886 } 887 HasNonHomogeneousPayloadElements(const llvm::Instruction * inst)888 bool HasNonHomogeneousPayloadElements(const llvm::Instruction* inst) 889 { 890 return m_PayloadMapping.HasNonHomogeneousPayloadElements(inst); 891 } GetLeftReservedOffset(const llvm::Instruction * inst,SIMDMode simdMode)892 int GetLeftReservedOffset(const llvm::Instruction* inst, SIMDMode simdMode) 893 { 894 return m_PayloadMapping.GetLeftReservedOffset(inst, simdMode); 895 } 896 GetRightReservedOffset(const llvm::Instruction * inst,SIMDMode simdMode)897 int GetRightReservedOffset(const llvm::Instruction* inst, SIMDMode simdMode) 898 { 899 return m_PayloadMapping.GetRightReservedOffset(inst, simdMode); 900 } GetSupremumOfNonHomogeneousPart(const llvm::Instruction * inst1,const llvm::Instruction * inst2)901 const llvm::Instruction* GetSupremumOfNonHomogeneousPart( 902 const llvm::Instruction* inst1, 903 const llvm::Instruction* inst2) 904 { 905 return m_PayloadMapping.GetSupremumOfNonHomogeneousPart(inst1, inst2); 906 } 907 GetNumSplitParts(llvm::Instruction * inst)908 uint GetNumSplitParts(llvm::Instruction* inst) 909 { 910 auto iter = splitPoint_.find(inst); 911 if (iter != splitPoint_.end()) 912 { 913 uint splitIndex = (*iter).second; 914 return splitIndex > 0 ? 2 : 1; 915 } 916 else 917 { 918 return 1; 919 } 920 } 921 SetCurrentPart(llvm::Instruction * inst,unsigned int partNum)922 void SetCurrentPart(llvm::Instruction* inst, unsigned int partNum) 923 { 924 if (partNum == 0) 925 { 926 currentLowBound_ = 0; 927 currentPart_ = partNum; 928 // currentUpperBound_ = splitPoint_[inst]; 929 } 930 else 931 { 932 IGC_ASSERT(partNum == 1); 933 currentLowBound_ = splitPoint_[inst]; 934 IGC_ASSERT(currentLowBound_ > 0); 935 currentPart_ = partNum; 936 } 937 } 938 939 private: 940 941 CPlatform m_Platform; 942 PayloadMapping m_PayloadMapping; 943 std::vector<CCTuple*> m_CCTupleList; 944 llvm::DominatorTree* DT; 945 LiveVars* LV; 946 WIAnalysis* WIA; 947 DeSSA* m_DeSSA; 948 CodeGenPatternMatch* CG; 949 llvm::DenseMap<llvm::Value*, ElementNode*> ValueNodeMap; 950 llvm::DenseMap<ElementNode*, CCTuple*> NodeCCTupleMap; 951 //Mapping root element node to its offset in cc tuple: 952 llvm::DenseMap<ElementNode*, int> NodeOffsetMap; 953 llvm::DenseMap<llvm::Value*, uint> ValueWeightMap; 954 ModuleMetaData* m_ModuleMetadata; 955 CodeGenContext* m_CodeGenContext; 956 //Maps a basic block to a list of instruction defs to be processed for coalescing (in dominance order) 957 llvm::DenseMap<llvm::BasicBlock*, std::vector<llvm::Instruction*> > BBProcessingDefs; 958 959 960 /* Taken from strong DE SSA */ 961 // Perform a depth-first traversal of the dominator tree, splitting 962 // interferences amongst PHI-congruence classes. 963 964 llvm::BumpPtrAllocator Allocator; 965 966 llvm::DenseMap<llvm::Value*, llvm::Value*> CurrentDominatingParent; 967 llvm::DenseMap<llvm::Value*, llvm::Value*> ImmediateDominatingParent; 968 969 llvm::DenseMap<llvm::Instruction*, uint> splitPoint_; 970 unsigned currentLowBound_; 971 //unsigned currentUpperBound_; 972 unsigned currentPart_; 973 974 /// Get the union-root of a PHI. The root of a PHI is 0 if the PHI has been 975 /// isolated. Otherwise, it is the original root of its destination and 976 /// all of its operands (before they were isolated, if they were). 977 llvm::Value* getPHIRoot(llvm::Instruction*) const; 978 979 void unionRegs(llvm::Value*, llvm::Value*); 980 isUniform(llvm::Value * v) const981 bool isUniform(llvm::Value* v) const { 982 return (WIA->isUniform(v)); 983 } 984 985 // Isolate a register. isolateReg(llvm::Value * Val)986 void isolateReg(llvm::Value* Val) { 987 ElementNode* Node = ValueNodeMap[Val]; 988 Node->parent.setInt(Node->parent.getInt() | ElementNode::kRegisterIsolatedFlag); 989 } 990 /* Taken from strong DE SSA */ 991 992 993 994 struct MIIndexCompare { MIIndexCompareIGC::CoalescingEngine::MIIndexCompare995 MIIndexCompare(LiveVars* _lv) : LV(_lv) { } 996 operator ()IGC::CoalescingEngine::MIIndexCompare997 bool operator()(const llvm::Instruction* LHS, const llvm::Instruction* RHS) const { 998 return LV->getDistance(LHS) < LV->getDistance(RHS); 999 } 1000 1001 LiveVars* LV; 1002 }; 1003 1004 bool isCoalescedByDeSSA(llvm::Value* V) const; 1005 }; 1006 1007 } //namespace IGC 1008