1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "Compiler/CISACodeGen/WIAnalysis.hpp"
12 #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
13 #include "Compiler/MetaDataUtilsWrapper.h"
14 #include "Compiler/CISACodeGen/PayloadMapping.hpp"
15 #include "common/LLVMWarningsPush.hpp"
16 #include <llvm/Pass.h>
17 #include <llvm/ADT/DenseSet.h>
18 #include <llvm/IR/InstVisitor.h>
19 #include <llvm/IR/Dominators.h>
20 #include <llvm/Support/Debug.h>
21 #include <llvm/Support/Allocator.h>
22 #include "common/LLVMWarningsPop.hpp"
23 #include "GenISAIntrinsics/GenIntrinsicInst.h"
24 #include "Probe/Assertion.h"
25 
26 namespace IGC {
27 
28     class CEncoder;
29     class CVariable;
30     class CodeGenContextWrapper;
31     class DeSSA;
32 
33 
34     class CoalescingEngine : public llvm::FunctionPass, public llvm::InstVisitor<CoalescingEngine>
35     {
36         //TODO: this is fixed for now, but once we have pressure heuristic, could be relaxed
37         static const int MaxTupleSize = 12;
38 
39     public:
40         static char ID; // Pass identification, replacement for typeid
41         CoalescingEngine();
42 
43         virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
44 
releaseMemory()45         virtual void releaseMemory() override {
46             Allocator.Reset();
47 
48             for (auto itr = m_CCTupleList.begin(),
49                 iend = m_CCTupleList.end();
50                 itr != iend; ++itr)
51             {
52                 CCTuple* ccTuple = *itr;
53                 delete ccTuple;
54             }
55             m_CCTupleList.clear();
56             //Nodes need not be deallocated since they are
57             //owned by bump allocator (Allocator), and are destroyed
58             //once bump allocator goes out the the scope.
59             NodeCCTupleMap.clear();
60             ValueNodeMap.clear();
61             BBProcessingDefs.clear();
62             NodeOffsetMap.clear();
63         }
64 
65         bool runOnFunction(llvm::Function&) override;
66 
getPassName() const67         virtual llvm::StringRef getPassName() const override {
68             return "CoalescingEngine";
69         }
70 
71         /// print - print partitions in human readable form
72         void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const override;
73 
74         /// dump - Dump the partitions to dbgs().
75         void dump() const;
76 
77         void ProcessBlock(llvm::BasicBlock* bb);
78 
79         //
80         bool MatchSingleInstruction(llvm::GenIntrinsicInst* I);
81 
82 
GetSingleElementWidth(SIMDMode simdMode,const llvm::DataLayout * pDL,llvm::Value * val)83         int GetSingleElementWidth(
84             SIMDMode simdMode,
85             const llvm::DataLayout* pDL,
86             llvm::Value* val)
87         {
88             int result = 0;
89             int mult = 1;
90             if (val->getType()->isHalfTy() && simdMode == SIMDMode::SIMD8)
91             {
92                 mult = 2;
93             }
94 
95             result = int_cast<int>(mult *
96                 numLanes(simdMode) *
97                 pDL->getTypeAllocSize(val->getType()));
98 
99             return result;
100         }
101 
102         //
103         CVariable* PrepareExplicitPayload(
104             CShader* outProgram,
105             CEncoder* encoder,
106             SIMDMode simdMode,
107             const llvm::DataLayout* pDL,
108             llvm::Instruction* inst,
109             int& payloadOffset);
110 
111 
112         void visitCastInst(llvm::CastInst& I);
113         void visitBinaryOperator(llvm::BinaryOperator& I);
114         void visitCmpInst(llvm::CmpInst& I);
115         void visitPHINode(llvm::PHINode& I);
116         void visitUnaryInstruction(llvm::UnaryInstruction& I);
117         void visitCallInst(llvm::CallInst& I);
118         void visitSelectInst(llvm::SelectInst& I);
119         void visitBitCastInst(llvm::BitCastInst& I);
120         void visitInstruction(llvm::Instruction& I);
121         void visitLoadInst(llvm::LoadInst& I);
122         void visitStoreInst(llvm::StoreInst& I);
123 
124 
125         //////////////////////////////////////////////////////////////////////////
126         struct ElementNode
127         {
128             enum Flags {
129                 kRegisterIsolatedFlag = 1,
130                 kPHIIsolatedFlag = 2
131             };
ElementNodeIGC::CoalescingEngine::ElementNode132             ElementNode(llvm::Value* _value) :
133                 value(_value), rank(0)
134             {
135                 parent.setPointer(this);
136             }
137 
~ElementNodeIGC::CoalescingEngine::ElementNode138             ~ElementNode()
139             {
140             }
141 
142             ElementNode* getLeader();
143 
144             // Fields:
145             llvm::PointerIntPair<ElementNode*, 2> parent;
146             llvm::Value* value;
147             unsigned rank;
148         };
149 
150         //////////////////////////////////////////////////////////////////////////
151         struct CCTuple
152         {
CCTupleIGC::CoalescingEngine::CCTuple153             CCTuple() :
154                 leftBound(0),
155                 rightBound(0),
156                 hasNonHomogeneousElements(false),
157                 rootInst(nullptr)
158             {
159             }
160 
161             int leftBound;
162             int rightBound;
163 
164 
GetLeftBoundIGC::CoalescingEngine::CCTuple165             int GetLeftBound() const
166             {
167                 return leftBound;
168             }
169 
GetRightBoundIGC::CoalescingEngine::CCTuple170             int GetRightBound() const
171             {
172                 return rightBound;
173             }
174 
175 
176             //cannot be safely extended left/right (used for non-homogeneous payload modeling)
177             bool hasNonHomogeneousElements;
178 
179             llvm::Instruction* rootInst;
180 
HasNonHomogeneousElementsIGC::CoalescingEngine::CCTuple181             inline bool HasNonHomogeneousElements() const
182             {
183                 return hasNonHomogeneousElements;
184             }
185 
SetHasNonHomogeneousElementsIGC::CoalescingEngine::CCTuple186             inline void SetHasNonHomogeneousElements(llvm::Instruction* _rootInst)
187             {
188                 IGC_ASSERT(rootInst == nullptr);
189                 rootInst = _rootInst;
190                 hasNonHomogeneousElements = true;
191             }
192 
GetRootIGC::CoalescingEngine::CCTuple193             inline llvm::Instruction* GetRoot() const
194             {
195                 return rootInst;
196             }
197 
198 
GetNumElementsIGC::CoalescingEngine::CCTuple199             inline uint GetNumElements() const
200             {
201                 IGC_ASSERT(rightBound >= leftBound);
202                 return rightBound - leftBound + 1;
203             }
204 
InitializeIndexWithCCRootIGC::CoalescingEngine::CCTuple205             inline void InitializeIndexWithCCRoot(int index, ElementNode* elNode)
206             {
207                 OffsetToCCMap[index] = elNode;
208 
209                 ResizeBounds(index);
210             }
211 
ResizeBoundsIGC::CoalescingEngine::CCTuple212             inline void ResizeBounds(int index)
213             {
214                 if (index < leftBound)
215                 {
216                     leftBound = index;
217                 }
218 
219                 if (index > rightBound)
220                 {
221                     rightBound = index;
222                 }
223             }
224 
CheckIndexForNonInitializedSlotIGC::CoalescingEngine::CCTuple225             inline void CheckIndexForNonInitializedSlot(int index)
226             {
227                 //if (OffsetToCCMap.count(index) == 0)
228                 {
229                     ResizeBounds(index);
230                 }
231             }
232 
233 
GetCCNodeWithIndexIGC::CoalescingEngine::CCTuple234             ElementNode* GetCCNodeWithIndex(int index)
235             {
236                 return OffsetToCCMap[index];
237             }
238 
AttachNodeAtIndexIGC::CoalescingEngine::CCTuple239             void AttachNodeAtIndex(int index, ElementNode* node, CoalescingEngine& CE)
240             {
241                 if (OffsetToCCMap.count(index) == 0 || OffsetToCCMap[index] == NULL)
242                 {
243                     IGC_ASSERT(node->value == CE.getRegRoot(node->value));
244 
245                     CE.NodeCCTupleMap[node] = this;
246                     CE.NodeOffsetMap[node] = index;
247 
248                     InitializeIndexWithCCRoot(index, node);
249                     CE.CurrentDominatingParent[node->value] = node->value;
250                     CE.ImmediateDominatingParent[node->value] = NULL;
251                 }
252                 else
253                 {
254                     if (OffsetToCCMap[index] == node)
255                     {
256                         //Do nothing, it is already there.
257                     }
258                     else
259                     {
260                         //Here comes a logic that will attach a new node to CC.
261                         IGC_ASSERT(OffsetToCCMap.count(index));
262                         IGC_ASSERT(OffsetToCCMap[index]);
263 
264                         llvm::Value* ccRootValue = OffsetToCCMap[index]->value;
265 
266                         CE.unionRegs(ccRootValue, node->value);
267 
268                         llvm::Value* NewParent = CE.GetActualDominatingParent(
269                             ccRootValue,
270                             llvm::dyn_cast<llvm::Instruction>(node->value));
271 
272                         CE.ImmediateDominatingParent[node->value] = NewParent;
273                         CE.CurrentDominatingParent[ccRootValue] = node->value;
274                     }
275                 }
276             }
277 
278             //print in human readable form
279             void print(llvm::raw_ostream& OS, const llvm::Module* = 0) const;
280 
281             void dump() const;
282 
283             //FIXME: not sure whether this is the best choice
284         protected:
285             llvm::DenseMap<int, ElementNode*> OffsetToCCMap;
286 
287             friend class CoalescingEngine;
288         };
289 
290         friend struct CCTuple;
291 
292         ///returns non-null CCtuple pointer if there is at least
293         // one value that is 'coalesced' into the tuple.
294         // It returns one of those values (e.g. for type insepction)
295         // if the condition is true.
296         CCTuple* IsAnyValueCoalescedInCCTuple(
297             llvm::Instruction* inst,
298             const uint numOperands,
299             int& zeroBasedPayloadElementOffset,
300             llvm::Value*& val);
301 
302         ///
303         bool IsPayloadCovered(
304             llvm::Instruction* inst,
305             CCTuple* ccTuple,
306             const uint numOperands,
307             const int payloadToCCTupleRelativeOffset);
308 
309 
310         //FIXME: do a filter over relevant (TGI) instructions
DetermineWeight(llvm::Value * val)311         uint DetermineWeight(llvm::Value* val)
312         {
313             if (ValueWeightMap.count(val)) {
314                 return ValueWeightMap[val];
315             }
316             else {
317                 uint numUsers = 0;
318                 {
319                     llvm::SmallPtrSet<llvm::User*, 8> touchedUsers;
320                     for (llvm::Value::user_iterator i = val->user_begin(), e = val->user_end(); i != e; ++i) {
321                         llvm::User* user = *i;
322                         if (llvm::isa<llvm::Instruction>(user)) {
323                             if (!touchedUsers.count(user))
324                             {
325                                 numUsers++;
326                                 touchedUsers.insert(user);
327                             }
328                         }
329                     }
330                 }
331                 ValueWeightMap[val] = numUsers;
332                 return numUsers;
333             }
334         }
335 
336         //FIXME: this might not be the most effective way if called multiple times
IsolationFilter(llvm::Value * val) const337         bool IsolationFilter(llvm::Value* val) const
338         {
339             if (isCoalescedByDeSSA(val)) {
340                 return true;
341             }
342             if (llvm::dyn_cast<llvm::PHINode>(val)) {
343                 return true;
344             }
345             else if (llvm::dyn_cast<llvm::ExtractElementInst>(val)) {
346                 return true;
347             }
348             else {
349                 for (llvm::Value::user_iterator i = val->user_begin(), e = val->user_end(); i != e; ++i)
350                 {
351                     if (llvm::dyn_cast<llvm::PHINode>(*i)) {
352                         return true;
353                     }
354                 }
355             }
356 
357             return false;
358         }
359 
IsValIsolated(llvm::Value * val)360         bool IsValIsolated(llvm::Value* val)
361         {
362             return IsolationFilter(val) ||
363                 isUniform(val) ||
364                 llvm::isa<llvm::Argument>(val);
365         }
366 
IsValConstOrIsolated(llvm::Value * val) const367         inline bool IsValConstOrIsolated(llvm::Value* val) const
368         {
369             return llvm::isa<llvm::Constant>(val) ||
370                 IsolationFilter(val) ||
371                 isUniform(val) ||
372                 llvm::isa<llvm::Argument>(val);
373         }
374 
375 
376         void IncrementalCoalesce(llvm::BasicBlock*);
377         void PrepareTuple(
378             const uint numOperands,
379             llvm::Instruction* tupleGeneratingInstruction,
380             llvm::SmallPtrSet<CCTuple*, 8> & touchedTuplesSet,
381             llvm::SmallVector<CCTuple*, 8> & touchedTuples,
382             bool& isAnyNodeAnchored,
383             bool& isAnyNodeCoalescable);
384 
385         void DecideSplit(llvm::Instruction* tupleGeneratingInstruction);
386 
387         void CreateTuple(
388             const uint numOperands,
389             llvm::Instruction* tupleGeneratingInstruction);
390 
391         void DetermineAnchor(
392             const uint numOperands,
393             const llvm::Instruction* tupleGeneratingInstruction,
394             CCTuple*& ccTuple,
395             int& rootTupleStartOffset,
396             int& thisTupleStartOffset);
397 
398         //Return true if it is ok to continue, false if interference is detected
EvictOrStop(CCTuple * ccTuple,const int index,llvm::Instruction * tupleGeneratingInstruction,bool const forceEviction,bool & interferes)399         bool EvictOrStop(
400             CCTuple* ccTuple,
401             const int index,
402             llvm::Instruction* tupleGeneratingInstruction,
403             bool const forceEviction,
404             //out:
405             bool& interferes)
406         {
407             if (!IsInsertionSlotAvailable(ccTuple, index, tupleGeneratingInstruction)) {
408                 if (forceEviction) {
409                     PrepareInsertionSlot(ccTuple, index, tupleGeneratingInstruction);
410                     return true; //we can continue
411                 }
412                 else {
413                     interferes = true;
414                     return false; //abort whole 'transaction'
415                 }
416             }
417             return true;
418         }
419 
420         class ProcessInterferencesElementFunctor;
421 
422         bool InterferenceCheck(
423             const uint numOperands,
424             llvm::Instruction* tupleGeneratingInstruction,
425             const int offsetDiff,
426             CCTuple* ccTuple,
427             //out:
428             ProcessInterferencesElementFunctor* interferencesFunctor);
429 
430         /// \brief return true if element slot is safe for copying-in
431         /// canExtend - if true, it is assumed that ccTuple can be 'still' extended
432         ///
433         ///
434         bool IsInsertionSlotAvailable(
435             CCTuple* ccTuple,
436             const int index,
437             llvm::Instruction* tupleGeneratingInstruction,
438             const bool canExtend = true);
439 
440         void PrepareInsertionSlot(
441             CCTuple* ccTuple,
442             const int index,
443             llvm::Instruction* tupleGeneratingInstruction,
444             const bool evictFullCongruenceClass = false);
445 
446         bool CheckIntersectionForNonHomogeneous(
447             const uint numOperands,
448             llvm::Instruction* tupleGeneratingInstruction,
449             const int offsetDiff,
450             CCTuple* ccTuple);
451 
452         void ProcessTuple(llvm::Instruction* tupleGeneratingInstruction);
453         void GreedyHeuristic(llvm::Instruction* tupleGeneratingInstruction);
454 
GetCCTupleList()455         std::vector<CCTuple*>& GetCCTupleList()
456         {
457             return m_CCTupleList;
458         }
459 
460         /// Get the union-root of a register. The root is 0 if the register has been
461         /// isolated.
462         llvm::Value* getRegRoot(llvm::Value*) const;
463 
464         ///Given the currentInstruction and root identifier of CC, find the element
465         ///that belongs to CC and is a proper dominator of the currentInstruction.
466         ///In straight-line codes and in separate payload and de-ssa phases, this
467         ///actually should always give the results in one iteration.
468         // Pop registers from the stack represented by ImmediateDominatingParent
469         // until we find a parent that dominates the current instruction.
GetActualDominatingParent(llvm::Value * RootV,llvm::Instruction * currentInstruction)470         inline llvm::Value* GetActualDominatingParent(llvm::Value* RootV, llvm::Instruction* currentInstruction)
471         {
472             llvm::Value* NewParent = CurrentDominatingParent[RootV];
473 
474             while (NewParent) {
475                 if (getRegRoot(NewParent)) {
476                     //Fixme: here it does not apply.
477                     // we have added the another condition because the domination-test
478                     // does not work between two phi-node. See the following comments
479                     // from the DT::dominates:
480                     // " It is not possible to determine dominance between two PHI nodes
481                     //   based on their ordering
482                     //  if (isa<PHINode>(A) && isa<PHINode>(B))
483                     //    return false;"
484                     if (llvm::isa<llvm::Argument>(NewParent)) {
485                         break;
486                     }
487                     else if (DT->dominates(llvm::cast<llvm::Instruction>(NewParent), currentInstruction)) {
488                         break;
489                     } /* else if (cast<llvm::Instruction>(NewParent)->getParent() == MBB &&
490                         isa<PHINode>(DefMI) && isa<PHINode>(NewParent)) {
491                             break;
492                     } */
493                 }
494                 NewParent = ImmediateDominatingParent[NewParent];
495             }
496 
497             return NewParent;
498         }
499 
500         //Here we test the interference between the
501         //Algorithm:
502         //Starting from the currentDominatingParent, walk the congruence class
503         //dominance tree upward, until an element that dominates the currentInstruction
504         //is found.
505         //Since this method could be called on the non-dominance traversal (e.g.
506         //currentInstruction dominates some elements in the tree)
507         // Say we have a dominance tree that is already constructed (lifetime goes
508         // downwards):
509         // CC dominance tree
510         //    v67
511         //    ^
512         //    |
513         //   v121 (dominating)
514         //    ^
515         //    |    -----   v181
516         //    |
517         //   v190 <- (dominated)
518         //
519         // Want to check the interference with v181, which dominates v190, but is dominated
520         // by v121. if we would just look for a dominating element for v181, we would find
521         // out that v121 is dominator of v181 and (say) it is not interfering with v181, thus
522         // leading to conclusion that there is no interference between v181 and CC dominance tree.
523 
SymmetricInterferenceTest(llvm::Value * RootV,llvm::Instruction * currentInstruction,llvm::Value * & dominating,llvm::Value * & dominated)524         inline void SymmetricInterferenceTest(
525             llvm::Value* RootV,
526             llvm::Instruction* currentInstruction,
527             llvm::Value*& dominating, //in-out
528             llvm::Value*& dominated)
529         {
530             llvm::Value* NewParent = CurrentDominatingParent[RootV];
531             dominating = nullptr;
532             dominated = nullptr;
533 
534             while (NewParent)
535             {
536                 if (getRegRoot(NewParent)) //not isolated
537                 {
538                     if (llvm::isa<llvm::Argument>(NewParent))
539                     {
540                         dominating = NewParent;
541                         break;
542                     }
543                     else if (DT->dominates(llvm::cast<llvm::Instruction>(NewParent), currentInstruction))
544                     {
545                         dominating = NewParent;
546                         break;
547                     }
548                     dominated = NewParent;
549                 }
550                 NewParent = ImmediateDominatingParent[NewParent];
551             }
552         }
553 
GetValueCCTupleMapping(llvm::Value * val)554         inline CCTuple* GetValueCCTupleMapping(llvm::Value* val)
555         {
556             llvm::Value* RootV = getRegRoot(val);
557             if (!RootV) {
558                 return NULL;
559             }
560 
561             IGC_ASSERT(ValueNodeMap.count(RootV));
562             auto RI = ValueNodeMap.find(RootV);
563             ElementNode* Node = RI->second;
564 
565             auto CCI = NodeCCTupleMap.find(Node);
566             if (CCI != NodeCCTupleMap.end()) {
567                 return CCI->second;
568             }
569             else {
570                 return NULL;
571             }
572         }
573 
574         /// Caller is responsible for assuring that value is not isolated
575         // e.g. by calling GetCalueCCTupleMapping previously.
GetValueOffsetInCCTuple(llvm::Value * val)576         inline int GetValueOffsetInCCTuple(llvm::Value* val)
577         {
578             llvm::Value* RootV = getRegRoot(val);
579             IGC_ASSERT(nullptr != RootV);
580             IGC_ASSERT(ValueNodeMap.count(RootV));
581 
582             auto RI = ValueNodeMap.find(RootV);
583             ElementNode* Node = RI->second;
584 
585             auto CCI = NodeOffsetMap.find(Node);
586             IGC_ASSERT(CCI != NodeOffsetMap.end());
587             return CCI->second;
588         }
589 
590         //////////////////////////////////////////////////////////////////////////
591         class ElementFunctor {
592         public:
593             virtual void SetIndex(int index) = 0;
594             virtual bool visitCopy() = 0;
595             virtual bool visitConstant() = 0;
596             virtual bool visitArgument() = 0;
597             virtual bool visitIsolated() = 0;
598             virtual bool visitAnchored() = 0;
599             virtual bool visitInterfering(llvm::Value* val, const bool evictFullCongruenceClass) = 0;
600             virtual bool visitPackedNonInterfering(llvm::Value* val) = 0;
~ElementFunctor()601             virtual ~ElementFunctor() {}
602         };
603 
604         //////////////////////////////////////////////////////////////////////////
605         class GatherWeightElementFunctor : public ElementFunctor
606         {
607         public:
GatherWeightElementFunctor()608             GatherWeightElementFunctor() :
609                 nAlignedAnchors(0),
610                 nInsertionSlotRequired(0),
611                 nNeedsDisplacement(0)
612             {
613             }
614 
SetIndex(int index)615             virtual void SetIndex(int index)
616             {
617             }
618 
visitCopy()619             virtual bool visitCopy()
620             {
621                 nInsertionSlotRequired++;
622                 return true;
623             }
624 
visitConstant()625             virtual bool visitConstant()
626             {
627                 nInsertionSlotRequired++;
628                 return true;
629             }
630 
631             ///Not used yet, but is here in an interface
632             ///for completeness.
visitArgument()633             virtual bool visitArgument()
634             {
635                 return true;
636             }
637 
visitIsolated()638             virtual bool visitIsolated()
639             {
640                 nInsertionSlotRequired++;
641                 return true;
642             }
643 
644             ///Visits constrained (anchored) element.
visitAnchored()645             virtual bool visitAnchored()
646             {
647                 nAlignedAnchors++;
648                 return true;
649             }
650 
651             ///Visits a value in a tuple that interferes with some (non-isolated)
652             ///element in a CC class that occupies the same slot.
visitInterfering(llvm::Value * val,const bool evictFullCongruenceClass)653             virtual bool visitInterfering(
654                 llvm::Value* val,
655                 const bool evictFullCongruenceClass)
656             {
657                 nNeedsDisplacement++;
658                 return true;
659             }
660 
visitPackedNonInterfering(llvm::Value * val)661             virtual bool visitPackedNonInterfering(llvm::Value* val)
662             {
663                 return true;
664             }
665 
666             ///Gets the number of 'anchored' elements in a tuple.
GetNumAlignedAnchors() const667             inline int GetNumAlignedAnchors() const
668             {
669                 return nAlignedAnchors;
670             }
671 
GetNumInsertionSlotsRequired() const672             inline int GetNumInsertionSlotsRequired() const
673             {
674                 return nInsertionSlotRequired;
675             }
676 
677             ///Gets the number of values in a tuple
GetNumNeedsDisplacement() const678             inline int GetNumNeedsDisplacement() const
679             {
680                 return nNeedsDisplacement;
681             }
682 
683         private:
684             int nAlignedAnchors;
685             int nInsertionSlotRequired;
686             int nNeedsDisplacement;
687         };
688 
689         //////////////////////////////////////////////////////////////////////////
690         class ProcessInterferencesElementFunctor : public ElementFunctor
691         {
692         private:
693             bool m_forceEviction;
694             bool m_interferes;
695             llvm::Instruction* m_tupleInst;
696             const int m_offsetDiff;
697             CCTuple* m_ccTuple;
698             CoalescingEngine* m_CE;
699             int m_index;
700             llvm::SmallPtrSet<llvm::Value*, 8> m_valuesForIsolation;
701 
702         public:
ProcessInterferencesElementFunctor(bool forceEviction,llvm::Instruction * inst,const int offsetDiff,CCTuple * ccTuple,CoalescingEngine * CE)703             ProcessInterferencesElementFunctor(
704                 bool forceEviction,
705                 llvm::Instruction* inst,
706                 const int offsetDiff,
707                 CCTuple* ccTuple,
708                 CoalescingEngine* CE) :
709                 m_forceEviction(forceEviction),
710                 m_interferes(false),
711                 m_tupleInst(inst),
712                 m_offsetDiff(offsetDiff),
713                 m_ccTuple(ccTuple),
714                 m_CE(CE),
715                 m_index(0)
716             {
717 
718             }
719 
GetComputedValuesForIsolation()720             llvm::SmallPtrSet<llvm::Value*, 8> & GetComputedValuesForIsolation()
721             {
722                 return m_valuesForIsolation;
723             }
724 
SetForceEviction(bool force)725             inline void SetForceEviction(bool force)
726             {
727                 m_forceEviction = force;
728             }
729 
IsInterfering() const730             inline bool IsInterfering() const
731             {
732                 return m_interferes;
733             }
734 
SetIndex(int index)735             virtual void SetIndex(int index)
736             {
737                 m_index = index;
738             }
739 
visitCopy()740             virtual bool visitCopy()
741             {
742                 return m_CE->EvictOrStop(
743                     m_ccTuple,
744                     m_index + m_offsetDiff,
745                     m_tupleInst,
746                     m_forceEviction,
747                     m_interferes);
748             }
visitConstant()749             virtual bool visitConstant()
750             {
751                 return m_CE->EvictOrStop(
752                     m_ccTuple,
753                     m_index + m_offsetDiff,
754                     m_tupleInst,
755                     m_forceEviction,
756                     m_interferes);
757             }
visitIsolated()758             virtual bool visitIsolated()
759             {
760                 return m_CE->EvictOrStop(
761                     m_ccTuple,
762                     m_index + m_offsetDiff,
763                     m_tupleInst,
764                     m_forceEviction,
765                     m_interferes);
766             }
767 
visitArgument()768             virtual bool visitArgument()
769             {
770                 return m_CE->EvictOrStop(
771                     m_ccTuple,
772                     m_index + m_offsetDiff,
773                     m_tupleInst,
774                     m_forceEviction,
775                     m_interferes);
776             }
777 
visitAnchored()778             virtual bool visitAnchored()
779             {
780                 return true;
781             }
782 
visitInterfering(llvm::Value * val,const bool evictFullCongruenceClass)783             virtual bool visitInterfering(llvm::Value* val, const bool evictFullCongruenceClass)
784             {
785                 if (m_forceEviction) {
786 
787                     // HEURISTIC:
788                     if (m_CE->DetermineWeight(val) < 2) {
789 
790                         m_CE->PrepareInsertionSlot(
791                             m_ccTuple,
792                             m_index + m_offsetDiff,
793                             m_tupleInst,
794                             false //evictFullCongruenceClass
795                         );
796 
797                         m_valuesForIsolation.insert(val);
798                     }
799                     else
800                     {
801                         m_CE->PrepareInsertionSlot(
802                             m_ccTuple,
803                             m_index + m_offsetDiff,
804                             m_tupleInst,
805                             true //evictFullCongruenceClass
806                         );
807 
808                     }
809                     return true;
810                 }
811                 else {
812                     m_interferes = true;
813                     return false;
814                 }
815 
816                 return true;
817             }
818 
visitPackedNonInterfering(llvm::Value * val)819             virtual bool visitPackedNonInterfering(llvm::Value* val)
820             {
821                 if (m_CE->DetermineWeight(val) < 2) {
822                     m_valuesForIsolation.insert(val);
823                 }
824 
825                 return true;
826             }
827         };
828 
829         void ProcessElements(
830             const uint numOperands,
831             llvm::Instruction* tupleInst,
832             const int offsetDiff,
833             CCTuple* ccTuple,
834             ElementFunctor* functor);
835 
836     public:
837 
GetNumPayloadElements(llvm::Instruction * inst)838         uint GetNumPayloadElements(llvm::Instruction* inst)
839         {
840             if (currentPart_ == 0)
841             {
842                 auto iter = splitPoint_.find(inst);
843                 if (iter == splitPoint_.end())
844                 {
845                     //means we have no split point, can
846                     return m_PayloadMapping.GetNumPayloadElements(inst);
847                 }
848                 else
849                 {
850                     IGC_ASSERT((*iter).second > 0);
851                     return (*iter).second;
852                 }
853                 //return splitPoint_
854             }
855             else
856             {
857                 IGC_ASSERT(currentPart_ == 1);
858                 auto iter = splitPoint_.find(inst);
859                 IGC_ASSERT(iter != splitPoint_.end());
860                 return m_PayloadMapping.GetNumPayloadElements(inst) - (*iter).second;
861             }
862         }
863         //uint GetNonAdjustedNumPayloadElements_Sample(const SampleIntrinsic *inst)
864         //{
865         //    return m_PayloadMapping.GetNonAdjustedNumPayloadElements_Sample(inst);
866         //}
867 
GetNumPayloadElements_Sample(const llvm::SampleIntrinsic * inst)868         int GetNumPayloadElements_Sample(const llvm::SampleIntrinsic* inst)
869         {
870             return m_PayloadMapping.GetNumPayloadElements_Sample(inst);
871         }
872 
873         /// Get the mapping from the payload element (at position index)
874         /// to intrinsic argument value. Indexing is zero based.
GetPayloadElementToValueMapping(const llvm::Instruction * inst,uint index)875         llvm::Value* GetPayloadElementToValueMapping(const llvm::Instruction* inst, uint index)
876         {
877             return m_PayloadMapping.GetPayloadElementToValueMapping(inst, currentLowBound_ + index);
878         }
GetPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic * inst,const uint index)879         llvm::Value* GetPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic* inst, const uint index)
880         {
881             return m_PayloadMapping.GetPayloadElementToValueMapping_sample(inst, index);
882         }
GetNonAdjustedPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic * inst,const uint index)883         llvm::Value* GetNonAdjustedPayloadElementToValueMapping_sample(const llvm::SampleIntrinsic* inst, const uint index)
884         {
885             return m_PayloadMapping.GetNonAdjustedPayloadElementToValueMapping_sample(inst, index);
886         }
887 
HasNonHomogeneousPayloadElements(const llvm::Instruction * inst)888         bool HasNonHomogeneousPayloadElements(const llvm::Instruction* inst)
889         {
890             return m_PayloadMapping.HasNonHomogeneousPayloadElements(inst);
891         }
GetLeftReservedOffset(const llvm::Instruction * inst,SIMDMode simdMode)892         int GetLeftReservedOffset(const llvm::Instruction* inst, SIMDMode simdMode)
893         {
894             return m_PayloadMapping.GetLeftReservedOffset(inst, simdMode);
895         }
896 
GetRightReservedOffset(const llvm::Instruction * inst,SIMDMode simdMode)897         int GetRightReservedOffset(const llvm::Instruction* inst, SIMDMode simdMode)
898         {
899             return m_PayloadMapping.GetRightReservedOffset(inst, simdMode);
900         }
GetSupremumOfNonHomogeneousPart(const llvm::Instruction * inst1,const llvm::Instruction * inst2)901         const llvm::Instruction* GetSupremumOfNonHomogeneousPart(
902             const llvm::Instruction* inst1,
903             const llvm::Instruction* inst2)
904         {
905             return m_PayloadMapping.GetSupremumOfNonHomogeneousPart(inst1, inst2);
906         }
907 
GetNumSplitParts(llvm::Instruction * inst)908         uint GetNumSplitParts(llvm::Instruction* inst)
909         {
910             auto iter = splitPoint_.find(inst);
911             if (iter != splitPoint_.end())
912             {
913                 uint splitIndex = (*iter).second;
914                 return splitIndex > 0 ? 2 : 1;
915             }
916             else
917             {
918                 return 1;
919             }
920         }
921 
SetCurrentPart(llvm::Instruction * inst,unsigned int partNum)922         void SetCurrentPart(llvm::Instruction* inst, unsigned int partNum)
923         {
924             if (partNum == 0)
925             {
926                 currentLowBound_ = 0;
927                 currentPart_ = partNum;
928                 //            currentUpperBound_ = splitPoint_[inst];
929             }
930             else
931             {
932                 IGC_ASSERT(partNum == 1);
933                 currentLowBound_ = splitPoint_[inst];
934                 IGC_ASSERT(currentLowBound_ > 0);
935                 currentPart_ = partNum;
936             }
937         }
938 
939     private:
940 
941         CPlatform m_Platform;
942         PayloadMapping m_PayloadMapping;
943         std::vector<CCTuple*> m_CCTupleList;
944         llvm::DominatorTree* DT;
945         LiveVars* LV;
946         WIAnalysis* WIA;
947         DeSSA* m_DeSSA;
948         CodeGenPatternMatch* CG;
949         llvm::DenseMap<llvm::Value*, ElementNode*> ValueNodeMap;
950         llvm::DenseMap<ElementNode*, CCTuple*> NodeCCTupleMap;
951         //Mapping root element node to its offset in cc tuple:
952         llvm::DenseMap<ElementNode*, int> NodeOffsetMap;
953         llvm::DenseMap<llvm::Value*, uint> ValueWeightMap;
954         ModuleMetaData* m_ModuleMetadata;
955         CodeGenContext* m_CodeGenContext;
956         //Maps a basic block to a list of instruction defs to be processed for coalescing (in dominance order)
957         llvm::DenseMap<llvm::BasicBlock*, std::vector<llvm::Instruction*> > BBProcessingDefs;
958 
959 
960         /* Taken from strong DE SSA */
961         // Perform a depth-first traversal of the dominator tree, splitting
962         // interferences amongst PHI-congruence classes.
963 
964         llvm::BumpPtrAllocator Allocator;
965 
966         llvm::DenseMap<llvm::Value*, llvm::Value*> CurrentDominatingParent;
967         llvm::DenseMap<llvm::Value*, llvm::Value*> ImmediateDominatingParent;
968 
969         llvm::DenseMap<llvm::Instruction*, uint> splitPoint_;
970         unsigned currentLowBound_;
971         //unsigned currentUpperBound_;
972         unsigned currentPart_;
973 
974         /// Get the union-root of a PHI. The root of a PHI is 0 if the PHI has been
975         /// isolated. Otherwise, it is the original root of its destination and
976         /// all of its operands (before they were isolated, if they were).
977         llvm::Value* getPHIRoot(llvm::Instruction*) const;
978 
979         void unionRegs(llvm::Value*, llvm::Value*);
980 
isUniform(llvm::Value * v) const981         bool isUniform(llvm::Value* v) const {
982             return (WIA->isUniform(v));
983         }
984 
985         // Isolate a register.
isolateReg(llvm::Value * Val)986         void isolateReg(llvm::Value* Val) {
987             ElementNode* Node = ValueNodeMap[Val];
988             Node->parent.setInt(Node->parent.getInt() | ElementNode::kRegisterIsolatedFlag);
989         }
990         /* Taken from strong DE SSA */
991 
992 
993 
994         struct MIIndexCompare {
MIIndexCompareIGC::CoalescingEngine::MIIndexCompare995             MIIndexCompare(LiveVars* _lv) : LV(_lv) { }
996 
operator ()IGC::CoalescingEngine::MIIndexCompare997             bool operator()(const llvm::Instruction* LHS, const llvm::Instruction* RHS) const {
998                 return LV->getDistance(LHS) < LV->getDistance(RHS);
999             }
1000 
1001             LiveVars* LV;
1002         };
1003 
1004         bool isCoalescedByDeSSA(llvm::Value* V) const;
1005     };
1006 
1007 } //namespace IGC
1008