1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 //
10 // GenXLiveness is an analysis that contains the liveness information for the
11 // values in the code. See the comment at the top of GenXLiveness.h for further
12 // details.
13 //
14 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "GENX_LIVENESS"
16 
17 #include "GenXLiveness.h"
18 #include "GenX.h"
19 #include "GenXBaling.h"
20 #include "GenXIntrinsics.h"
21 #include "GenXNumbering.h"
22 #include "GenXSubtarget.h"
23 #include "GenXTargetMachine.h"
24 #include "GenXUtil.h"
25 #include "vc/GenXOpts/GenXAnalysis.h"
26 #include "vc/GenXOpts/Utils/InternalMetadata.h"
27 #include "vc/GenXOpts/Utils/RegCategory.h"
28 
29 #include "llvm/GenXIntrinsics/GenXMetadata.h"
30 
31 #include "Probe/Assertion.h"
32 #include "llvmWrapper/IR/InstrTypes.h"
33 #include "llvmWrapper/IR/Instructions.h"
34 
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/CodeGen/TargetPassConfig.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/DiagnosticInfo.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/InstIterator.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/Support/Debug.h"
46 
47 #include <unordered_set>
48 
49 using namespace llvm;
50 using namespace genx;
51 
SimpleValue(const AssertingSV & ASV)52 SimpleValue::SimpleValue(const AssertingSV &ASV)
53     : SimpleValue(ASV.getValue(), ASV.getIndex()) {}
54 
getAnalysisUsage(AnalysisUsage & AU)55 void GenXLiveness::getAnalysisUsage(AnalysisUsage &AU) {
56   AU.addRequired<TargetPassConfig>();
57   AU.setPreservesAll();
58 }
59 
60 /***********************************************************************
61  * runOnFunctionGroup : do nothing
62  */
runOnFunctionGroup(FunctionGroup & ArgFG)63 bool GenXLiveness::runOnFunctionGroup(FunctionGroup &ArgFG)
64 {
65   FG = &ArgFG;
66   Subtarget = &getAnalysis<TargetPassConfig>()
67                    .getTM<GenXTargetMachine>()
68                    .getGenXSubtarget();
69   DL = &ArgFG.getModule()->getDataLayout();
70   return false;
71 }
72 
73 /***********************************************************************
74  * releaseMemory: clear the GenXLiveness
75  */
releaseMemory()76 void GenXLiveness::releaseMemory() {
77   while (!LiveRangeMap.empty()) {
78     LiveRange *LR = begin()->second;
79     for (auto i = LR->value_begin(), e = LR->value_end(); i != e; ++i) {
80       SimpleValue V = *i;
81       LiveRangeMap.erase(V);
82     }
83     delete LR;
84   }
85   FG = 0;
86   CG.reset();
87   for (auto i = UnifiedRets.begin(), e = UnifiedRets.end(); i != e; ++i)
88     i->second->deleteValue();
89   UnifiedRets.clear();
90   UnifiedRetToFunc.clear();
91   ArgAddressBaseMap.clear();
92   BaseToArgAddrMap.clear();
93 }
94 
95 /***********************************************************************
96  * setLiveRange : add a SimpleValue to a LiveRange
97  *
98  * This:
99  * 1. adds the SimpleValue to the LiveRange's value list;
100  * 2. sets the SimpleValue's entry in the map to point to the LiveRange.
101  */
setLiveRange(SimpleValue V,LiveRange * LR)102 void GenXLiveness::setLiveRange(SimpleValue V, LiveRange *LR)
103 {
104   IGC_ASSERT_MESSAGE(
105       LiveRangeMap.find(V) == end(),
106       "Attempting to set LiveRange for Value that already has one");
107   LR->addValue(V);
108   LiveRangeMap[V] = LR;
109   LR->setAlignmentFromValue(
110       *DL, V, Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize);
111 }
112 
113 /***********************************************************************
114  * setAlignmentFromValue : set a live range's alignment from a value
115  */
setAlignmentFromValue(const DataLayout & DL,const SimpleValue V,const unsigned GRFWidth)116 void LiveRange::setAlignmentFromValue(const DataLayout &DL, const SimpleValue V,
117                                       const unsigned GRFWidth) {
118   const unsigned AlignInBytes = getValueAlignmentInBytes(*(V.getValue()), DL);
119   const unsigned LogByteAlign = Log2_32(AlignInBytes);
120   // Set max alignment to GRF.
121   const unsigned MaxLogAlign =
122       genx::getLogAlignment(VISA_Align::ALIGN_GRF, GRFWidth);
123   const unsigned SaturatedLogAlign = std::clamp(LogByteAlign, 0u, MaxLogAlign);
124   setLogAlignment(ceilLogAlignment(SaturatedLogAlign, GRFWidth));
125 }
126 
127 /***********************************************************************
128  * rebuildCallGraph : rebuild GenXLiveness's call graph
129  */
rebuildCallGraph()130 void GenXLiveness::rebuildCallGraph()
131 {
132   CG = std::make_unique<genx::CallGraph>(FG);
133   CG->build(this);
134 }
135 
136 /***********************************************************************
137  * buildSubroutineLRs : build the subroutine LRs
138  *
139  * If the FunctionGroup has subroutines, then each one (each Function other
140  * than the head one) gets a "subroutine LR", giving the live range
141  * of the whole subroutine plus any other subroutines it can call.
142  * Then, when building a real live range later, if it goes over a call,
143  * we can add the subroutine LR.
144  *
145  * The subroutine LR has weak liveness, as that's what we want to add to
146  * anything live over a call to the subroutine.
147  */
buildSubroutineLRs()148 void GenXLiveness::buildSubroutineLRs()
149 {
150   if (FG->size() == 1)
151     return; // no subroutines
152   // Build a call graph for the FunctionGroup. It is acyclic because there is
153   // no recursion.
154   rebuildCallGraph();
155   // Depth-first walk the graph to propagate live ranges upwards.
156   visitPropagateSLRs(FG->getHead());
157 }
158 
159 /***********************************************************************
160  * visitPropagateSLRs : visit a callgraph node to propagate subroutine LR
161  *
162  * This is recursive.
163  */
visitPropagateSLRs(Function * F)164 LiveRange *GenXLiveness::visitPropagateSLRs(Function *F)
165 {
166   LiveRange *LR = getOrCreateLiveRange(F);
167   // Add a segment for just this function.
168   LR->push_back(Segment(Numbering->getNumber(F),
169       Numbering->getNumber(F->back().getTerminator()) + 1, Segment::WEAK));
170   // For each child...
171   genx::CallGraph::Node *N = CG->getNode(F);
172   for (auto i = N->begin(), e = N->end(); i != e; ++i) {
173     // Visit the child to calculate its LR.
174     LiveRange *ChildLR = visitPropagateSLRs(i->Call->getCalledFunction());
175     // Merge it into ours.
176     LR->addSegments(ChildLR);
177   }
178   LR->sortAndMerge();
179   return LR;
180 }
181 
182 /***********************************************************************
183  * buildLiveRange : build live range for one value (arg or non-baled inst)
184  *
185  * For a struct value, each element's live range is built separately, even
186  * though they are almost identical. They are not exactly identical,
187  * differing at the def if it is the return value of a call, and at a use
188  * that is a call arg.
189  */
buildLiveRange(Value * V)190 void GenXLiveness::buildLiveRange(Value *V)
191 {
192   auto ST = dyn_cast<StructType>(V->getType());
193   if (!ST) {
194     buildLiveRange(SimpleValue(V));
195     return;
196   }
197   for (unsigned i = 0, e = IndexFlattener::getNumElements(ST); i != e; ++i)
198     buildLiveRange(SimpleValue(V, i));
199 }
200 
201 /***********************************************************************
202  * buildLiveRange : build live range for one SimpleValue
203  *
204  * rebuildLiveRange : rebuild live range for a LiveRange struct
205  *
206  * The BBs[] array, one entry per basic block, is temporarily used here to
207  * store the live range for the value within that block. We start by
208  * registering the short live range for the definition, then, for each use,
209  * create a live range in the use's block then recursively scan back
210  * through predecessors until we meet a block where there is already a
211  * live range. This is guaranteed to terminate because of the dominance
212  * property of SSA.
213  *
214  * See Appel "Modern Compiler Implementation in C" 19.6.
215  *
216  * rebuildLiveRange can be called from later passes to rebuild the segments
217  * for a particular live range. If used after coalescing, the live range might
218  * have more than one value, in which case segments are added for each value
219  * and then merged. Thus we assume that, after whatever code change a pass made
220  * to require rebuilding the live range, the coalesced values can still be
221  * validly coalesced, without having any way of checking that.
222  *
223  */
buildLiveRange(SimpleValue V)224 LiveRange *GenXLiveness::buildLiveRange(SimpleValue V)
225 {
226   LiveRange *LR = getOrCreateLiveRange(V);
227   rebuildLiveRange(LR);
228   return LR;
229 }
230 
rebuildLiveRange(LiveRange * LR)231 void GenXLiveness::rebuildLiveRange(LiveRange *LR)
232 {
233   LR->getOrDefaultCategory();
234   LR->Segments.clear();
235   for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi)
236     rebuildLiveRangeForValue(LR, *vi);
237   LR->sortAndMerge();
238 }
239 
rebuildLiveRangeForValue(LiveRange * LR,SimpleValue SV)240 void GenXLiveness::rebuildLiveRangeForValue(LiveRange *LR, SimpleValue SV)
241 {
242   Value *V = SV.getValue();
243 
244   // This value is a global variable. Its live range is the entire kernel.
245   if (auto GV = getUnderlyingGlobalVariable(V)) {
246     (void)GV;
247     LR->push_back(0, Numbering->getLastNumber());
248     return;
249   }
250 
251   std::map<BasicBlock *, Segment> BBRanges;
252   if (auto Func = isUnifiedRet(V)) {
253     // This value is the unified return value of the function Func. Its live
254     // range is from the call to where its post-copy would go just afterwards
255     // for each call site, also from the site of the pre-copy to the return
256     // instruction.
257     for (auto *U: Func->users()) {
258       if (auto *CI = genx::checkFunctionCall(U, Func))
259         LR->push_back(Numbering->getNumber(CI),
260                       Numbering->getRetPostCopyNumber(CI, SV.getIndex()));
261     }
262     for (auto fi = Func->begin(), fe = Func->end(); fi != fe; ++fi)
263       if (auto RI = dyn_cast<ReturnInst>(fi->getTerminator()))
264         LR->push_back(Numbering->getRetPreCopyNumber(RI, SV.getIndex()),
265             Numbering->getNumber(RI));
266     return;
267   }
268 
269   // Mark the value as live and then almost immediately dead again at the
270   // point where it is defined.
271   unsigned StartNum = 0, EndNum = 0;
272   Function *Func = 0;
273   auto Arg = dyn_cast<Argument>(V);
274   BasicBlock *BB = nullptr;
275   if (Arg) {
276     Func = Arg->getParent();
277     StartNum = Numbering->getNumber(Func);
278     EndNum = StartNum + 1;
279     BB = &Func->front();
280   } else if (auto Phi = dyn_cast<PHINode>(V)) {
281     // Phi node. Treat as defined at the start of the block.
282     EndNum = Numbering->getNumber(Phi) + 1;
283     BB = Phi->getParent();
284     StartNum = Numbering->getNumber(BB);
285     // For a phi node, we also need to register an extra little live range at
286     // the end of each predecessor, from where we will insert a copy to the
287     // end. This is done lower down in this function.
288   } else {
289     StartNum = Numbering->getNumber(V);
290     auto Inst = cast<Instruction>(V);
291     BB = Inst->getParent();
292     auto CI = dyn_cast<CallInst>(V);
293     if (CI) {
294       if (!GenXIntrinsic::isAnyNonTrivialIntrinsic(V)) {
295         // For the return value from a call, move the definition point to the ret
296         // post-copy slot after the call, where the post-copy will be inserted if
297         // it fails to be coalesced with the function's unified return value.
298         StartNum = Numbering->getRetPostCopyNumber(CI, SV.getIndex());
299       }
300     }
301     EndNum = StartNum + 1;
302     if (CI && getTwoAddressOperandNum(CI)) {
303       // Two address op. Move the definition point one earlier, to where
304       // GenXCoalescing will need to insert a copy if coalescing fails.
305       --StartNum;
306     }
307   }
308   BBRanges[BB] = Segment(StartNum, EndNum);
309   // The stack for predecessors that need to be processed:
310   std::vector<BasicBlock *> Stack;
311   // Process each use.
312   for (Value::use_iterator i = V->use_begin(), e = V->use_end();
313       i != e; ++i) {
314     BasicBlock *BB = nullptr;
315     Instruction *user = cast<Instruction>(i->getUser());
316     unsigned Num;
317     if (PHINode *Phi = dyn_cast<PHINode>(user)) {
318       // Use in a phi node. We say that the use is where the phi copy will be
319       // placed in the predecessor block.
320       BB = Phi->getIncomingBlock(*i);
321       Num = Numbering->getPhiNumber(Phi, BB);
322     } else {
323       // Normal use.
324       // For live range purposes, an instruction is considered to be at the
325       // same place as the head of its bale. We need to use getBaleHead to
326       // ensure that we consider it to be there.
327       Instruction *UserHead = Baling->getBaleHead(user);
328       BB = UserHead->getParent();
329       Num = Numbering->getNumber(UserHead);
330       if (auto CI = dyn_cast<CallInst>(user)) {
331         if (CI->isInlineAsm() || IGCLLVM::isIndirectCall(*CI))
332           Num = Numbering->getNumber(UserHead);
333         else {
334         switch (GenXIntrinsic::getAnyIntrinsicID(CI)) {
335           case GenXIntrinsic::not_any_intrinsic:
336             // Use as a call arg. We say that the use is at the arg pre-copy
337             // slot, where the arg copy will be inserted in coalescing. This
338             // assumes that the copies will be in the same order as args in the
339             // call, with struct elements in order too.
340             Num = Numbering->getArgPreCopyNumber(CI, i->getOperandNo(),
341                                                  SV.getIndex());
342             break;
343           default:
344             if (auto OpndNum = getTwoAddressOperandNum(CI);
345                 OpndNum && *OpndNum == i->getOperandNo()) {
346               // The use is the two address operand in a two address op. Move
347               // the use point one earlier, to where GenXCoalescing will need
348               // to insert a copy if coalescing fails. If there is any other
349               // use of this value in the same bale, that will not have its use
350               // point one number earlier. The unnecessary interference that
351               // would cause is fixed in the way that twoAddrInterfere()
352               // detects interference.
353               --Num;
354             }
355             break;
356           case GenXIntrinsic::genx_simdcf_goto:
357             // Use in a goto. Treat it as at the branch, as GenXCisaBuilder
358             // writes the goto just before the branch, after any intervening IR.
359             Num = Numbering->getNumber(CI->getParent()->getTerminator());
360             break;
361           }
362         }
363       } else if (auto RI = dyn_cast<ReturnInst>(user)) {
364         // Use in a return. We say that the use is where the ret value
365         // pre-copy will be inserted in coalescing. This assumes that the
366         // copies will be in the same order as the struct elements in the
367         // return value.
368         Num = Numbering->getRetPreCopyNumber(RI, SV.getIndex());
369       }
370     }
371     auto BBRange = &BBRanges[BB];
372     if (BBRange->getEnd()) {
373       // There is already a live range in this block. Extend it if
374       // necessary. No need to scan back from here, so we're done with
375       // this use.
376       if (BBRange->getEnd() < Num)
377         BBRange->setEnd(Num);
378       continue;
379     }
380     // Add a new live range from the start of this block, and remember the
381     // range of blocks that contain a live range (so we don't have to scan
382     // all of them at the end).
383     *BBRange = Segment(Numbering->getNumber(BB), Num);
384     // Push this block's predecessors onto the stack.
385     std::copy(pred_begin(BB), pred_end(BB), std::back_inserter(Stack));
386     // Process stack until empty.
387     while (Stack.size()) {
388       BB = Stack.back();
389       Stack.pop_back();
390       BBRange = &BBRanges[BB];
391       auto BBNum = Numbering->getBBNumber(BB);
392       if (BBRange->getEnd()) {
393         // There is already a live range in this block. Extend it to the end.
394         // No need to scan back from here.
395         BBRange->setEnd(BBNum->EndNumber);
396         continue;
397       }
398       // Add a new live range through the whole of this block, and remember the
399       // range of blocks that contain a live range (so we don't have to scan
400       // all of them at the end).
401       BBRange->setStartEnd(Numbering->getNumber(BB), BBNum->EndNumber);
402       // Push this block's predecessors onto the stack.
403       std::copy(pred_begin(BB), pred_end(BB), std::back_inserter(Stack));
404     }
405   }
406   // Now we can build the live range.
407   for (auto bri = BBRanges.begin(), bre = BBRanges.end(); bri != bre; ++bri) {
408     auto BBRange = &bri->second;
409     LR->push_back(*BBRange);
410   }
411   if (PHINode *Phi = dyn_cast<PHINode>(V)) {
412     // For a phi node, we also need to register an extra little live range at
413     // the end of each predecessor, from where we will insert a copy to the
414     // end.
415     for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
416       auto Pred = Phi->getIncomingBlock(i);
417       auto BBNum = Numbering->getBBNumber(Pred);
418       LR->push_back(Segment(Numbering->getPhiNumber(Phi, Pred),
419             BBNum->EndNumber, Segment::PHICPY));
420     }
421   }
422   LR->sortAndMerge();
423   if (CG) {
424     // Check if the live range crosses any call instruction. If so, add the
425     // appropriate subroutine live range.
426     bool NeedSort = false;
427     auto N = CG->getNode(Func);
428     for (auto i = N->begin(), e = N->end(); i != e; ++i) {
429       auto E = &*i;
430       // See if this call is in a segment of the LR.
431       auto Seg = LR->find(E->Number);
432       if (Seg != LR->end() && Seg->getStart() <= E->Number && Seg->getEnd() > E->Number) {
433         // Yes it is. Merge the subroutine LR of the callee into our LR.
434         if (!E->Call->getCalledFunction()->hasFnAttribute("CMStackCall"))
435           LR->addSegments(getLiveRange(E->Call->getCalledFunction()));
436         NeedSort = true;
437       }
438     }
439     if (NeedSort)
440       LR->sortAndMerge();
441   }
442   if (Arg) {
443     // For a function arg, for each call site, add a segment from the arg
444     // pre-copy site, the point just before the call at which it will be copied
445     // into, up to the call.  We assume that any copies before the call
446     // inserted by coalescing will be in the obvious order of args and elements
447     // within args.
448     Function *F = Arg->getParent();
449     if (*FG->begin() != F) { // is a subroutine
450       for (auto *U : F->users()) {
451         if (auto *CI = genx::checkFunctionCall(U, F))
452           LR->push_back(Numbering->getArgPreCopyNumber(CI, Arg->getArgNo(),
453                                                        SV.getIndex()),
454                         Numbering->getNumber(CI));
455       }
456     }
457   }
458 }
459 
removeBale(Bale & B)460 void GenXLiveness::removeBale(Bale &B) {
461   for (auto bi = B.begin(), be = B.end(); bi != be; ++bi)
462     removeValue(bi->Inst);
463 }
464 
465 /***********************************************************************
466  * removeValue : remove the supplied value from its live range, and delete
467  *               the range if it now has no values
468  *
469  * removeValueNoDelete : same, but do not delete the LR if it is now
470  *               valueless
471  *
472  * Calling this with a value that does not have a live range is silently
473  * ignored.
474  */
removeValue(Value * V)475 void GenXLiveness::removeValue(Value *V)
476 {
477   for (unsigned i = 0, e = IndexFlattener::getNumElements(V->getType()); i != e; ++i)
478     removeValue(SimpleValue(V, i));
479 }
480 
removeValue(SimpleValue V)481 void GenXLiveness::removeValue(SimpleValue V)
482 {
483   LiveRange *LR = removeValueNoDelete(V);
484   if (LR && !LR->Values.size()) {
485     // V was the only value in LR. Remove LR completely.
486     delete LR;
487   }
488 }
489 
removeValueNoDelete(SimpleValue V)490 LiveRange *GenXLiveness::removeValueNoDelete(SimpleValue V)
491 {
492   LiveRangeMap_t::iterator i = LiveRangeMap.find(V);
493   if (i == end())
494     return nullptr;
495   LiveRange *LR = i->second;
496   LiveRangeMap.erase(i);
497   // Remove V from LR.
498   unsigned j;
499   for (j = 0; LR->Values[j].get() != V; ++j) {
500     IGC_ASSERT(j != LR->Values.size());
501   }
502   if (&LR->Values[j] != &LR->Values.back())
503     LR->Values[j] = LR->Values.back();
504   LR->Values.pop_back();
505   return LR;
506 }
507 
508 /***********************************************************************
509  * removeValuesNoDelete : remove all values from the live range, but do not
510  *        delete the LR
511  */
removeValuesNoDelete(LiveRange * LR)512 void GenXLiveness::removeValuesNoDelete(LiveRange *LR)
513 {
514   for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi)
515     LiveRangeMap.erase(*vi);
516   LR->value_clear();
517 }
518 
519 /***********************************************************************
520  * replaceValue : update liveness such that NewVal has OldVal's live range,
521  *    and OldVal does not have one at all.
522  */
replaceValue(Value * OldVal,Value * NewVal)523 void GenXLiveness::replaceValue(Value *OldVal, Value *NewVal)
524 {
525   for (unsigned i = 0, e = IndexFlattener::getNumElements(OldVal->getType());
526       i != e; ++i)
527     replaceValue(SimpleValue(OldVal, i), SimpleValue(NewVal, i));
528 }
529 
replaceValue(SimpleValue OldVal,SimpleValue NewVal)530 void GenXLiveness::replaceValue(SimpleValue OldVal, SimpleValue NewVal)
531 {
532   LiveRangeMap_t::iterator i = LiveRangeMap.find(OldVal);
533   IGC_ASSERT(i != end());
534   LiveRange *LR = i->second;
535   LiveRangeMap.erase(i);
536   LiveRangeMap[NewVal] = LR;
537   unsigned j = 0;
538   IGC_ASSERT(!LR->Values.empty());
539   for (j = 0; LR->Values[j].get() != OldVal; ++j)
540     IGC_ASSERT(j != LR->Values.size());
541   LR->Values[j] = NewVal;
542 }
543 
544 /***********************************************************************
545  * getOrCreateLiveRange : get live range for a value, creating if necessary
546  */
getOrCreateLiveRange(SimpleValue V)547 LiveRange *GenXLiveness::getOrCreateLiveRange(SimpleValue V)
548 {
549   LiveRangeMap_t::iterator i = LiveRangeMap.insert(
550       LiveRangeMap_t::value_type(V, 0)).first;
551   LiveRange *LR = i->second;
552   if (!LR) {
553     // Newly created map entry. Create the LiveRange for it.
554     LR = new LiveRange;
555     LR->Values.push_back(V);
556     i->second = LR;
557     LR->setAlignmentFromValue(
558         *DL, V, Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize);
559   }
560 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561   // Give the Value a name if it doesn't already have one.
562   if (!V.getValue()->getName().size()) {
563     std::string NameBuf;
564     StringRef Name = "arg";
565     if (auto Inst = dyn_cast<Instruction>(V.getValue())) {
566       unsigned IID = GenXIntrinsic::getAnyIntrinsicID(V.getValue());
567       if (GenXIntrinsic::isAnyNonTrivialIntrinsic(IID)) {
568         // For an intrinsic call, use the intrinsic name after the
569         // final period.
570         NameBuf = GenXIntrinsic::getAnyName(IID, None);
571         Name = NameBuf;
572         size_t Period = Name.rfind('.');
573         if (Period != StringRef::npos)
574           Name = Name.slice(Period + 1, Name.size());
575       } else
576         Name = Inst->getOpcodeName();
577     }
578     V.getValue()->setName(Name);
579   }
580 #endif
581   return LR;
582 }
583 
getOrCreateLiveRange(SimpleValue V,unsigned Cat,unsigned LogAlign)584 LiveRange *GenXLiveness::getOrCreateLiveRange(SimpleValue V, unsigned Cat, unsigned LogAlign) {
585   auto LR = getOrCreateLiveRange(V);
586   LR->setCategory(Cat);
587   LR->setLogAlignment(LogAlign);
588   return LR;
589 }
590 
591 /***********************************************************************
592  * eraseLiveRange : get rid of live range for a Value, possibly multiple
593  *  ones if it is a struct value
594  */
eraseLiveRange(Value * V)595 void GenXLiveness::eraseLiveRange(Value *V)
596 {
597   auto ST = dyn_cast<StructType>(V->getType());
598   if (!ST) {
599     eraseLiveRange(SimpleValue(V));
600     return;
601   }
602   for (unsigned i = 0, e = IndexFlattener::getNumElements(ST); i != e; ++i)
603     eraseLiveRange(SimpleValue(V, i));
604 }
605 
606 /***********************************************************************
607  * eraseLiveRange : get rid of live range for a Value, if any
608  */
eraseLiveRange(SimpleValue V)609 void GenXLiveness::eraseLiveRange(SimpleValue V)
610 {
611   auto LR = getLiveRangeOrNull(V);
612   if (LR)
613     eraseLiveRange(LR);
614 }
615 
616 /***********************************************************************
617  * eraseLiveRange : get rid of the specified live range, and remove its
618  *        values from the map
619  */
eraseLiveRange(LiveRange * LR)620 void GenXLiveness::eraseLiveRange(LiveRange *LR)
621 {
622   for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi)
623     LiveRangeMap.erase(*vi);
624   delete LR;
625 }
626 
627 /***********************************************************************
628  * getLiveRangeOrNull : get live range for value, or 0 if none
629  *
630  * The returned LiveRange pointer is valid only until the next time the
631  * live ranges are modified, including the case of coalescing.
632  */
getLiveRangeOrNull(SimpleValue V) const633 const LiveRange *GenXLiveness::getLiveRangeOrNull(SimpleValue V) const
634 {
635   auto i = LiveRangeMap.find(V);
636   if (i == end())
637     return nullptr;
638   return i->second;
639 }
640 
getLiveRangeOrNull(SimpleValue V)641 LiveRange *GenXLiveness::getLiveRangeOrNull(SimpleValue V)
642 {
643   return const_cast<LiveRange*>(static_cast<const GenXLiveness*>(this)->getLiveRangeOrNull(V));
644 }
645 
646 /***********************************************************************
647  * getLiveRange : get live range for value
648  *
649  * The returned LiveRange pointer is valid only until the next time the
650  * live ranges are modified, including the case of coalescing.
651  */
getLiveRange(SimpleValue V)652 LiveRange *GenXLiveness::getLiveRange(SimpleValue V)
653 {
654   LiveRange *LR = getLiveRangeOrNull(V);
655   IGC_ASSERT_MESSAGE(LR, "no live range found");
656   return LR;
657 }
658 
659 /***********************************************************************
660  * getUnifiedRet : get/create unified return value for a function
661  *
662  * Returns already created unified value, or creates new one
663  * if there was no such.
664  */
getUnifiedRet(Function * F)665 Value *GenXLiveness::getUnifiedRet(Function *F) {
666   auto *Ret = getUnifiedRetIfExist(F);
667   if (!Ret)
668     return createUnifiedRet(F);
669   return Ret;
670 }
671 
getUnifiedRetIfExist(Function * F) const672 Value *GenXLiveness::getUnifiedRetIfExist(Function *F) const {
673   auto RetIt = UnifiedRets.find(F);
674   if (RetIt == UnifiedRets.end())
675     return nullptr;
676   return RetIt->second;
677 }
678 
679 /***********************************************************************
680  * createUnifiedRet : create unified return value for a function
681  *
682  * To allow all returns in a function and all results of calls to that
683  * function to use the same register, we have a dummy "unified return
684  * value".
685  *
686  * Cannot be called on a function with void return type.
687  *
688  * This also creates the LiveRange for the unified return value, or
689  * multiple ones if it is struct type, and sets the category to the same as in
690  * one of the return instructions.
691  */
createUnifiedRet(Function * F)692 Value *GenXLiveness::createUnifiedRet(Function *F) {
693   IGC_ASSERT_MESSAGE(!F->isDeclaration(), "must be a function definition");
694   IGC_ASSERT_MESSAGE(UnifiedRets.find(F) == UnifiedRets.end(),
695     "Unified ret must not have been already created");
696   Type *Ty = F->getReturnType();
697   IGC_ASSERT(!Ty->isVoidTy());
698   auto URet = genx::createUnifiedRet(Ty, "", F->getParent());
699   // Find some return inst.
700   ReturnInst *Ret = nullptr;
701   for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi)
702     if ((Ret = dyn_cast<ReturnInst>(fi->getTerminator())))
703       break;
704   IGC_ASSERT_MESSAGE(Ret, "must find return instruction");
705   Value *RetVal = Ret->getOperand(0);
706   // Use the categories of its operand to set the categories of the unified
707   // return value.
708   for (unsigned StructIdx = 0, NumElements = IndexFlattener::getNumElements(Ty);
709        StructIdx != NumElements; ++StructIdx) {
710     int Cat = getOrCreateLiveRange(SimpleValue(RetVal, StructIdx))
711                   ->getOrDefaultCategory();
712     SimpleValue SV(URet, StructIdx);
713     getOrCreateLiveRange(SV)->setCategory(Cat);
714   }
715 
716   UnifiedRets[F] = URet;
717   UnifiedRetToFunc[URet] = F;
718   return URet;
719 }
720 
721 /***********************************************************************
722  * isUnifiedRet : test whether a value is a unified return value
723  *
724  * A unified ret value is a call instruction that is
725  * not attached to any BB, and is in the UnifiedRetFunc map.
726  */
isUnifiedRet(Value * V)727 Function *GenXLiveness::isUnifiedRet(Value *V)
728 {
729   // Quick checks first.
730   auto Inst = dyn_cast<CallInst>(V);
731   if (!Inst)
732     return nullptr;
733   if (Inst->getParent())
734     return nullptr;
735   // Then map lookup.
736   auto i = UnifiedRetToFunc.find(V);
737   if (i == UnifiedRetToFunc.end())
738     return nullptr;
739   return i->second;
740 }
741 
742 /***********************************************************************
743  * moveUnifiedRet : move a function's unified return value to another function
744  *
745  * This is used when replacing a function with a new one in GenXArgIndirection.
746  */
moveUnifiedRet(Function * OldF,Function * NewF)747 void GenXLiveness::moveUnifiedRet(Function *OldF, Function *NewF)
748 {
749   auto i = UnifiedRets.find(OldF);
750   if (i == UnifiedRets.end())
751     return;
752   Value *UR = i->second;
753   UnifiedRets[NewF] = UR;
754   UnifiedRets.erase(i);
755   UnifiedRetToFunc[UR] = NewF;
756 }
757 
758 /***********************************************************************
759  * find : given an instruction number, find a segment in a live range
760  *
761  * If the number is within a segment, or is just on its end point, that
762  * segment is returned. If the number is in a hole, the next segment
763  * after the hole is returned. If the number is before the first
764  * segment, the first segment is returned. If the number is after the
765  * last segment, end() is returned.
766  */
find(unsigned Pos)767 LiveRange::iterator LiveRange::find(unsigned Pos)
768 {
769   size_t Len = size();
770   if (!Len)
771     return end();
772   if (Pos > Segments[Len - 1].getEnd())
773     return end();
774   iterator I = begin();
775   do {
776     size_t Mid = Len >> 1;
777     if (Pos <= I[Mid].getEnd())
778       Len = Mid;
779     else
780       I += Mid + 1, Len -= Mid + 1;
781   } while (Len);
782   IGC_ASSERT(I->getEnd() >= Pos);
783   return I;
784 }
785 
getCategoryForPredefinedVariable(SimpleValue SV)786 static unsigned getCategoryForPredefinedVariable(SimpleValue SV) {
787   const unsigned Category =
788       llvm::StringSwitch<unsigned>(SV.getValue()->getName())
789           .Case(genx::BSSVariableName, RegCategory::SURFACE)
790           .Default(RegCategory::NUMCATEGORIES);
791   IGC_ASSERT_MESSAGE(Category != RegCategory::NUMCATEGORIES,
792                      "Unhandled predefined variable");
793   return Category;
794 }
795 
isPredefinedVariable(SimpleValue SV)796 static bool isPredefinedVariable(SimpleValue SV) {
797   Value *V = SV.getValue();
798   IGC_ASSERT_MESSAGE(V, "Expected value");
799   if (!isa<GlobalVariable>(V))
800     return false;
801   IGC_ASSERT_MESSAGE(SV.getIndex() == 0,
802                      "Expected single simple value for predefined variable");
803   auto *GV = cast<GlobalVariable>(V);
804   return GV->hasAttribute(genx::VariableMD::VCPredefinedVariable);
805 }
806 
807 /***********************************************************************
808  * getOrDefaultCategory : get category; if none, set default
809  *
810  * The default category is PREDICATE for i1 or a vector of i1, or GENERAL
811  * for anything else.
812  */
getOrDefaultCategory()813 unsigned LiveRange::getOrDefaultCategory()
814 {
815   unsigned Cat = getCategory();
816   if (Cat != RegCategory::NONE)
817     return Cat;
818   IGC_ASSERT(!value_empty());
819   SimpleValue SV = *value_begin();
820   if (isPredefinedVariable(SV)) {
821     Cat = getCategoryForPredefinedVariable(SV);
822     setCategory(Cat);
823     return Cat;
824   }
825   Type *Ty = IndexFlattener::getElementType(
826       SV.getValue()->getType(), SV.getIndex());
827   if (Ty->getScalarType()->isIntegerTy(1))
828     Cat = RegCategory::PREDICATE;
829   else
830     Cat = RegCategory::GENERAL;
831   setCategory(Cat);
832   return Cat;
833 }
834 
835 /***********************************************************************
836  * interfere : check whether two live ranges interfere
837  *
838  * Two live ranges interfere if there is a segment from each that overlap
839  * and they are considered to cause interference by
840  * checkIfOverlappingSegmentsInterfere below.
841  */
interfere(LiveRange * LR1,LiveRange * LR2)842 bool GenXLiveness::interfere(LiveRange *LR1, LiveRange *LR2) {
843   if (CoalescingDisabled)
844     return true;
845   return getSingleInterferenceSites(LR1, LR2, nullptr);
846 }
847 
848 /***********************************************************************
849  * twoAddrInterfere : check whether two live ranges interfere, allowing for
850  *    single number interference sites at two address ops
851  *
852  * Return:  true if they interfere
853  *
854  * Two live ranges interfere if there is a segment from each that overlap
855  * and are not both weak.
856  *
857  * But, if each interfering segment is a single number that is the precopy
858  * site of a two address op, and the result of the two address op is in one LR
859  * and the two address operand is in the other, then that is not counted as
860  * interference.
861  *
862  * That provision allows for coalescing at a two address op where the two
863  * address operand has already been copy coalesced with, or is the same value
864  * as, a different operand in the same bale, as follows:
865  *
866  * Suppose the two address op a has number N, and it has two address operand b
867  * and some other operand c in the same bale:
868  *
869  * N-1: (space for precopy)
870  * N:   a = op(b, c)
871  *
872  * with live ranges
873  * a:[N-1,..)
874  * b:[..,N-1)
875  * c:[..,N)
876  *
877  * Then a and b can coalesce.
878  *
879  * But suppose b and c are the same value, or had previously been copy coalesced.
880  * Then we have live ranges
881  * a:[N-1,..)
882  * b,c:[..,N)
883  *
884  * and a and b now interfere needlessly.
885  *
886  * This function is called on an attempt to coalesce a and b (or rather the
887  * live range containing a and the live range containing b).  In it, we see
888  * that there is a single number segment of interference [N-1,N), where a is
889  * the result and b is the two address operand of the two address op at N. Thus
890  * we discount that segment of interference, and a and b can still coalesce.
891  *
892  * Note that this formulation allows for there to be multiple such sites because
893  * of multiple two address results being already coalesced together through phi
894  * nodes.
895  */
twoAddrInterfere(LiveRange * LR1,LiveRange * LR2)896 bool GenXLiveness::twoAddrInterfere(LiveRange *LR1, LiveRange *LR2) {
897   if (CoalescingDisabled)
898     return true;
899   SmallVector<unsigned, 4> Sites;
900   if (getSingleInterferenceSites(LR1, LR2, &Sites))
901     return true; // interferes, not just single number sites
902   if (Sites.empty())
903     return false; // does not interfere at all
904   // Put the single number sites in a set.
905   SmallSet<unsigned, 4> SitesSet;
906   LLVM_DEBUG(dbgs() << "got single number interference sites:");
907   for (auto i = Sites.begin(), e = Sites.end(); i != e; ++i) {
908     LLVM_DEBUG(dbgs() << " " << *i);
909     SitesSet.insert(*i);
910   }
911   LLVM_DEBUG(dbgs() << "\nbetween:\n" << *LR1 << "\n" << *LR2 << "\n");
912   Sites.clear();
913   // Check each def in LR1 and LR2 for being a two address op that causes us to
914   // discount a single number interference site.
915   for (auto LR = LR1, OtherLR = LR2; LR;
916       LR = LR == LR1 ? LR2 : nullptr, OtherLR = LR1) {
917     for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi) {
918       auto CI = dyn_cast<CallInst>(vi->getValue());
919       if (!CI)
920         continue;
921       auto OperandNum = getTwoAddressOperandNum(CI);
922       if (!OperandNum)
923         continue;
924       // Got a two addr op. Check whether the two addr operand is in the other
925       // LR.
926       if (getLiveRangeOrNull(CI->getOperand(*OperandNum)) != OtherLR)
927         continue;
928       // Discount the single number interference site here, if there is one.
929       SitesSet.erase(getNumbering()->getNumber(CI) - 1);
930     }
931   }
932   // If we have discounted all sites, then there is no interference.
933   return !SitesSet.empty();
934 }
935 
936 /***********************************************************************
937  * getSingleInterferenceSites : check whether two live ranges interfere,
938  *      returning single number interference sites
939  *
940  * Enter:   LR1, LR2 = live ranges to check
941  *          Sites = vector in which to store single number interference sites,
942  *                  or 0 if we do not want to collect them
943  *
944  * Return:  true if the live ranges interfere other than as reflected in Sites
945  *
946  * Two live ranges interfere if there is a segment from each that overlap
947  * and are not both weak.
948  *
949  * If Sites is 0 (the caller does not want the Sites list), then the function
950  * returns true if there is any interference.
951  *
952  * If Sites is not 0, then any interference in a single number segment, for
953  * example [19,20), causes the start number to be pushed into Sites. The
954  * function returns true only if there is interference that cannot be described
955  * in Sites.
956  */
getSingleInterferenceSites(LiveRange * LR1,LiveRange * LR2,SmallVectorImpl<unsigned> * Sites)957 bool GenXLiveness::getSingleInterferenceSites(LiveRange *LR1, LiveRange *LR2,
958     SmallVectorImpl<unsigned> *Sites)
959 {
960   // Swap if necessary to make LR1 the one with more segments.
961   if (LR1->size() < LR2->size())
962     std::swap(LR1, LR2);
963   auto Idx2 = LR2->begin(), End2 = LR2->end();
964   // Find segment in LR1 that contains or is the next after the start
965   // of the first segment in LR2, including the case that the start of
966   // the LR2 segment abuts the end of the LR1 segment.
967   auto Idx1 = LR1->find(Idx2->getStart()), End1 = LR1->end();
968   if (Idx1 == End1)
969     return false;
970   for (;;) {
971     // Check for overlap.
972     if (Idx1->getStart() < Idx2->getStart()) {
973       if (Idx1->getEnd() > Idx2->getStart())
974         if (checkIfOverlappingSegmentsInterfere(LR1, Idx1, LR2, Idx2)) {
975           // Idx1 overlaps Idx2. Check if it is a single number overlap that can
976           // be pushed into Sites.
977           if (!Sites || Idx1->getEnd() != Idx2->getStart() + 1)
978             return true;
979           Sites->push_back(Idx2->getStart());
980         }
981     } else {
982       if (Idx1->getStart() < Idx2->getEnd())
983         if (checkIfOverlappingSegmentsInterfere(LR1, Idx1, LR2, Idx2)) {
984           // Idx2 overlaps Idx1. Check if it is a single number overlap that can
985           // be pushed into Sites.
986           if (!Sites || Idx2->getEnd() != Idx1->getStart() + 1)
987             return true;
988           Sites->push_back(Idx1->getStart());
989         }
990     }
991     // Advance whichever one has the lowest End.
992     if (Idx1->getEnd() < Idx2->getEnd()) {
993       if (++Idx1 == End1)
994         return false;
995     } else {
996       if (++Idx2 == End2)
997         return false;
998     }
999   }
1000 }
1001 
1002 /***********************************************************************
1003  * checkIfOverlappingSegmentsInterfere : given two segments that have been
1004  *    shown to overlap, check whether their strengths make them interfere
1005  *
1006  * If both segments are weak, they do not interfere.
1007  *
1008  * Interference between a normal segment in one LR and a phicpy segment in the
1009  * other LR is ignored, as long as the phicpy segment relates to a phi incoming
1010  * where the phi node is in the LR with the phicpy segment and the incoming
1011  * value is in the LR with the strong segment. This is used to avoid
1012  * unnecessary interference for a phi incoming through a critical edge, where
1013  * the incoming is likely to be used in the other successor as well.
1014  */
checkIfOverlappingSegmentsInterfere(LiveRange * LR1,Segment * S1,LiveRange * LR2,Segment * S2)1015 bool GenXLiveness::checkIfOverlappingSegmentsInterfere(
1016     LiveRange *LR1, Segment *S1, LiveRange *LR2, Segment *S2)
1017 {
1018   if (S1->isWeak() && S2->isWeak())
1019     return false; // both segments weak
1020   if (S2->Strength == Segment::PHICPY) {
1021     // Swap so that if either segment is phicpy, then it is S1 for the check
1022     // below.
1023     std::swap(LR1, LR2);
1024     std::swap(S1, S2);
1025   }
1026   if (S1->Strength != Segment::PHICPY)
1027     return true;
1028   // S1 is phicpy. If its corresponding phi cpy insertion point is for a phi
1029   // node in LR1 and an incoming in LR2, then this does not cause interference.
1030   auto PhiIncoming = Numbering->getPhiIncomingFromNumber(S1->getStart());
1031   auto PhiFound = std::find_if(
1032       PhiIncoming.begin(), PhiIncoming.end(), [LR1, this](auto It) {
1033         PHINode *Phi = It.first;
1034         IGC_ASSERT_MESSAGE(Phi, "phi incoming not found");
1035         return getLiveRange(Phi) == LR1;
1036       });
1037   if (PhiFound == PhiIncoming.end())
1038     return true; // phi not in LR1, interferes
1039 
1040   PHINode *Phi = PhiFound->first;
1041   unsigned IncomingBBIndex = PhiFound->second;
1042   if (getLiveRangeOrNull(Phi->getIncomingValue(IncomingBBIndex)) != LR2)
1043     return true; // phi incoming not in LR2, interferes
1044   // Conditions met -- does not cause interference.
1045   return false;
1046 }
1047 
1048 /***********************************************************************
1049  * coalesce : coalesce two live ranges that do not interfere
1050  *
1051  * Enter:   LR1 = first live range
1052  *          LR2 = second live range
1053  *          DisallowCASC = true to disallow call arg special coalescing
1054  *                         into the resulting live range
1055  *
1056  * Return:  new live range (LR1 and LR2 now invalid)
1057  */
coalesce(LiveRange * LR1,LiveRange * LR2,bool DisallowCASC)1058 LiveRange *GenXLiveness::coalesce(LiveRange *LR1, LiveRange *LR2,
1059     bool DisallowCASC)
1060 {
1061   IGC_ASSERT_MESSAGE(LR1 != LR2, "cannot coalesce an LR to itself");
1062   IGC_ASSERT_MESSAGE(LR1->Category == LR2->Category,
1063     "cannot coalesce two LRs with different categories");
1064   // Make LR1 the one with the longer list of segments.
1065   if (LR2->Segments.size() > LR1->Segments.size()) {
1066     LiveRange *temp = LR1;
1067     LR1 = LR2;
1068     LR2 = temp;
1069   }
1070   LLVM_DEBUG(
1071     dbgs() << "Coalescing \"";
1072     LR1->print(dbgs());
1073     dbgs() << "\" and \"";
1074     LR2->print(dbgs());
1075     dbgs() << "\"\n"
1076   );
1077   // Do the merge of the segments.
1078   merge(LR1, LR2);
1079   // Copy LR2's values across to LR1.
1080   for (auto i = LR2->value_begin(), e = LR2->value_end(); i != e; ++i)
1081     LiveRangeMap[LR1->addValue(*i)] = LR1;
1082   // Use the largest alignment from the two LRs.
1083   LR1->LogAlignment = std::max(LR1->LogAlignment, LR2->LogAlignment);
1084   // If either LR has a non-zero offset, use it.
1085   IGC_ASSERT(!LR1->Offset || !LR2->Offset);
1086   LR1->Offset |= LR2->Offset;
1087   // Set DisallowCASC.
1088   LR1->DisallowCASC |= LR2->DisallowCASC | DisallowCASC;
1089   delete LR2;
1090   LLVM_DEBUG(
1091     dbgs() << "  giving \"";
1092     LR1->print(dbgs());
1093     dbgs() << "\"\n"
1094   );
1095   return LR1;
1096 }
1097 
1098 /***********************************************************************
1099  * copyInterfere : check whether two live ranges copy-interfere
1100  *
1101  * Two live ranges LR1 and LR2 copy-interfere (a non-commutative relation)
1102  * if LR1 includes a value that is a phi node whose definition is within
1103  * LR2.
1104  */
copyInterfere(LiveRange * LR1,LiveRange * LR2)1105 bool GenXLiveness::copyInterfere(LiveRange *LR1, LiveRange *LR2) {
1106   if (CoalescingDisabled)
1107     return true;
1108   // Find a phi node value in LR1. It can have at most one, because only
1109   // copy coalescing has occurred up to now, and copy coalescing does not
1110   // occur at a phi node.
1111   for (unsigned i = 0, e = LR1->Values.size(); i != e; ++i) {
1112     auto Phi = dyn_cast<PHINode>(LR1->Values[i].getValue());
1113     if (!Phi)
1114       continue;
1115     // Found a phi node in LR1. A phi node has multiple instruction numbers,
1116     // one for each incoming block. See if any one of those is in LR2's
1117     // live range.
1118     for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i)
1119       if (LR2->contains(Numbering->getPhiNumber(Phi, Phi->getIncomingBlock(i))))
1120         return true;
1121     break;
1122   }
1123   return false; // no phi node found
1124 }
1125 
1126 /***********************************************************************
1127  * wrapsAround : detects if V1 is a phi node and V2 wraps round to a use
1128  *              in a phi node in the same basic block as V1 and after it
1129  */
wrapsAround(Value * V1,Value * V2)1130 bool GenXLiveness::wrapsAround(Value *V1, Value *V2)
1131 {
1132   auto PhiDef = dyn_cast<PHINode>(V1);
1133   if (!PhiDef)
1134     return false;
1135   for (auto ui = V2->use_begin(), ue = V2->use_end(); ui != ue; ++ui) {
1136     if (auto PhiUse = dyn_cast<PHINode>(ui->getUser())) {
1137       if (PhiUse->getParent() == PhiDef->getParent()) {
1138         // Phi use in the same BB. Scan until we find PhiDef or the end
1139         // of the phi nodes.
1140         while (PhiUse != PhiDef) {
1141           PhiUse = dyn_cast<PHINode>(PhiUse->getNextNode());
1142           if (!PhiUse)
1143             return true; // reach end of phi nodes
1144         }
1145       }
1146     }
1147   }
1148   return false;
1149 }
1150 
1151 /***********************************************************************
1152  * insertCopy : insert a copy of a non-struct value
1153  *
1154  * Enter:   InputVal = value to copy
1155  *          LR = live range to add the new value to (0 to avoid adjusting
1156  *                live ranges)
1157  *          InsertBefore = insert copy before this inst
1158  *          Name = name to give the new value
1159  *          Number = number to give the new instruction(s), 0 for none
1160  *
1161  * Return:  The new copy instruction
1162  *
1163  * This inserts multiple copies if the input value is a vector that is
1164  * bigger than two GRFs or a non power of two size.
1165  *
1166  * This method is mostly used from GenXCoalescing, which passes an LR to
1167  * add the new copied value to.
1168  *
1169  * It is also used from GenXLiveRange if it needs to add a copy to break an
1170  * overlapping circular phi value, in which case LR is 0 as we do not want to
1171  * adjust live ranges. Also at this stage there is no baling info to update.
1172  */
insertCopy(Value * InputVal,LiveRange * LR,Instruction * InsertBefore,const Twine & Name,unsigned Number,const GenXSubtarget * ST)1173 Instruction *GenXLiveness::insertCopy(Value *InputVal, LiveRange *LR,
1174                                       Instruction *InsertBefore,
1175                                       const Twine &Name, unsigned Number,
1176                                       const GenXSubtarget *ST) {
1177   IGC_ASSERT(!isa<Constant>(InputVal));
1178   bool AdjustLRs = LR != nullptr;
1179   LiveRange *SourceLR = nullptr;
1180   if (AdjustLRs)
1181     SourceLR = getLiveRange(InputVal);
1182   auto InputTy = InputVal->getType();
1183   if (InputTy->getScalarType()->isIntegerTy(1)) {
1184     // The input is a predicate.
1185     if (!isa<Constant>(InputVal)) {
1186       // The predicate input is not a constant.
1187       // There is no way in vISA of copying from one
1188       // predicate to another, so we copy all 0s into the destination
1189       // then "or" the source into it.
1190       Instruction *NewInst = CastInst::Create(Instruction::BitCast,
1191           Constant::getNullValue(InputTy), InputTy, Name, InsertBefore);
1192       Numbering->setNumber(NewInst, Number);
1193       if (AdjustLRs)
1194         setLiveRange(SimpleValue(NewInst), LR);
1195       NewInst = BinaryOperator::Create(Instruction::Or, NewInst, InputVal, Name,
1196           InsertBefore);
1197       Numbering->setNumber(NewInst, Number);
1198       if (AdjustLRs)
1199         setLiveRange(SimpleValue(NewInst), LR);
1200       return NewInst;
1201     }
1202     // Predicate input is constant.
1203     auto NewInst = CastInst::Create(Instruction::BitCast, InputVal,
1204         InputTy, Name, InsertBefore);
1205     Numbering->setNumber(NewInst, Number);
1206     if (AdjustLRs)
1207       setLiveRange(SimpleValue(NewInst), LR);
1208     return NewInst;
1209   }
1210   Instruction *NewInst = nullptr;
1211   if (InputTy->isPointerTy()) {
1212     // BitCast used to represent a normal copy.
1213     NewInst = CastInst::Create(Instruction::BitCast, InputVal,
1214                                InputVal->getType(), Name, InsertBefore);
1215     if (Number)
1216       Numbering->setNumber(NewInst, Number);
1217     if (AdjustLRs)
1218       setLiveRange(SimpleValue(NewInst), LR);
1219     return NewInst;
1220   }
1221 
1222   Region R(InputVal);
1223   IGC_ASSERT(ST);
1224   unsigned MaxNum = getLegalRegionSizeForTarget(
1225       *ST, R, /* StartIdx */ 0, /* Allow2D */ false, R.NumElements);
1226   // Adjust size to Exec size
1227   MaxNum = std::min(MaxNum, TotalEMSize);
1228   if (exactLog2(R.NumElements) >= 0 && R.NumElements <= MaxNum) {
1229     // Can be done with a single copy.
1230     if (SourceLR && (SourceLR->Category != RegCategory::GENERAL
1231         || (LR && LR->Category != RegCategory::GENERAL))) {
1232       // Need a category conversion (including the case that the two
1233       // categories are the same but not GENERAL).
1234       NewInst = createConvert(InputVal, Name, InsertBefore);
1235     } else {
1236       // BitCast used to represent a normal copy.
1237       NewInst = CastInst::Create(Instruction::BitCast, InputVal,
1238           InputVal->getType(), Name, InsertBefore);
1239     }
1240     if (Number)
1241       Numbering->setNumber(NewInst, Number);
1242     if (AdjustLRs)
1243       setLiveRange(SimpleValue(NewInst), LR);
1244     return NewInst;
1245   }
1246 
1247   auto collectFragment = [](Value *V, unsigned MaxFrag,
1248                          SmallVectorImpl<std::pair<unsigned, unsigned>>& Frag,
1249                          unsigned MaxElt) {
1250     while (!isa<UndefValue>(V)) {
1251       if (!GenXIntrinsic::isWrRegion(V))
1252         return true;
1253       IntrinsicInst *WII = cast<IntrinsicInst>(V);
1254       Region R = makeRegionFromBaleInfo(WII, BaleInfo());
1255       if (R.Indirect || !R.isContiguous() || !R.isWholeNumRows())
1256         return true;
1257       if ((R.Offset % R.ElementBytes) != 0)
1258         return true;
1259       unsigned Base = R.Offset / R.ElementBytes;
1260       for (unsigned Offset = 0; Offset < R.NumElements; /*EMPTY*/) {
1261         unsigned NumElts = std::min(MaxElt, R.NumElements - Offset);
1262         // Round NumElts down to power of 2. That is how many elements we
1263         // are copying this time round the loop.
1264         NumElts = 1 << genx::log2(NumElts);
1265         Frag.push_back(std::make_pair(Base + Offset, NumElts));
1266         Offset += NumElts;
1267       }
1268       V = WII->getOperand(0);
1269     }
1270     if (Frag.size() > MaxFrag)
1271       return true;
1272     std::sort(Frag.begin(), Frag.end());
1273     return false;
1274   };
1275 
1276   unsigned NumElements = R.NumElements;
1277   SmallVector<std::pair<unsigned, unsigned>, 8> Fragments;
1278   unsigned MaxCopies = (NumElements + MaxNum - 1) / MaxNum;
1279   if (collectFragment(InputVal, MaxCopies, Fragments, MaxNum)) {
1280     Fragments.clear();
1281     for (unsigned Offset = 0; Offset < NumElements; /*EMPTY*/) {
1282       unsigned NumElts = std::min(MaxNum, NumElements - Offset);
1283       // Round NumElts down to power of 2. That is how many elements we
1284       // are copying this time round the loop.
1285       NumElts = 1 << genx::log2(NumElts);
1286       Fragments.push_back(std::make_pair(Offset, NumElts));
1287       Offset += NumElts;
1288     }
1289   }
1290   // Need to split the copy up. Start with an undef destination.
1291   Value *Res = UndefValue::get(InputVal->getType());
1292   for (auto &I : Fragments) {
1293     unsigned Offset = I.first;
1294     // Set the subregion.
1295     R.NumElements = I.second;
1296     R.Width = R.NumElements;
1297     R.Offset = Offset * R.ElementBytes;
1298     // Create the rdregion. Do not add this to a live range because it is
1299     // going to be baled in to the wrregion.
1300     Instruction *RdRegion = R.createRdRegion(InputVal, Name, InsertBefore,
1301         DebugLoc(), true/*AllowScalar*/);
1302     if (Baling)
1303       Baling->setBaleInfo(RdRegion, BaleInfo(BaleInfo::RDREGION, 0));
1304     if (Number)
1305       Numbering->setNumber(RdRegion, Number);
1306     // Create the wrregion, and mark that it bales in the rdregion (operand 1).
1307     NewInst = R.createWrRegion(Res, RdRegion, Name, InsertBefore, DebugLoc());
1308     if (Number)
1309       Numbering->setNumber(NewInst, Number);
1310     if (Baling) {
1311       BaleInfo BI(BaleInfo::WRREGION);
1312       BI.setOperandBaled(1);
1313       Baling->setBaleInfo(NewInst, BI);
1314     }
1315     if (AdjustLRs) {
1316       // Add the last wrregion to the live range (thus coalescing them all
1317       // together and in with the phi node or two address op that we're doing
1318       // the copy for).
1319       setLiveRange(SimpleValue(NewInst), LR);
1320     }
1321     Res = NewInst;
1322   }
1323   return NewInst;
1324 }
1325 
1326 /***********************************************************************
1327  * merge : merge segments of LR2 into LR1
1328  *
1329  * This is equivalent to addSegments followed by sortAndMerge.
1330  *
1331  * Previously there was some code here that attempted to optimize on the
1332  * assumption that the caller passed the one with the longer list of segments
1333  * as LR1. However that became too complicated once we introduced weak and
1334  * strong liveness.
1335  *
1336  * One day we could re-introduce some simple optimized paths, such as when
1337  * LR2 has a single segment.
1338  */
merge(LiveRange * LR1,LiveRange * LR2)1339 void GenXLiveness::merge(LiveRange *LR1, LiveRange *LR2)
1340 {
1341   LR1->addSegments(LR2);
1342   LR1->sortAndMerge();
1343 }
1344 
1345 /***********************************************************************
1346  * eraseUnusedTree : erase unused tree of instructions
1347  *
1348  * Enter:   Inst = root of unused tree
1349  *
1350  * This erases Inst, then recursively erases other instructions that become
1351  * unused. Erased instructions are also removed from liveness.
1352  *
1353  * Other than the given Inst, this does not erase a non-intrinsic call, or
1354  * an intrinsic call with a side effect.
1355  *
1356  * Instead of erasing as we go, we undef operands to make them unused and then
1357  * erase everything at the end. This is required for the case that we have an
1358  * unused DAG of instructions rather than just an unused tree, for example
1359  * where we have a rd-wr sequence and all the rds use the same input.
1360  */
eraseUnusedTree(Instruction * TopInst)1361 void GenXLiveness::eraseUnusedTree(Instruction *TopInst)
1362 {
1363   SmallVector<Instruction *, 4> Stack;
1364   std::set<Instruction *> ToErase;
1365   Stack.push_back(TopInst);
1366   while (!Stack.empty()) {
1367     auto Inst = Stack.back();
1368     Stack.pop_back();
1369     if (!Inst->use_empty())
1370       continue;
1371     if (TopInst != Inst) {
1372       if (auto CI = dyn_cast<CallInst>(Inst)) {
1373         if (!GenXIntrinsic::isAnyNonTrivialIntrinsic(CI))
1374           continue;
1375         if (!CI->doesNotAccessMemory())
1376           continue;
1377       }
1378     }
1379     for (unsigned oi = 0, oe = Inst->getNumOperands(); oi != oe; ++oi)
1380       if (auto OpndInst = dyn_cast<Instruction>(Inst->getOperand(oi))) {
1381         Stack.push_back(OpndInst);
1382         Inst->setOperand(oi, UndefValue::get(OpndInst->getType()));
1383       }
1384     removeValue(Inst);
1385     ToErase.insert(Inst);
1386   }
1387   for (auto i = ToErase.begin(), e = ToErase.end(); i != e; ++i)
1388     (*i)->eraseFromParent();
1389 }
1390 
1391 /***********************************************************************
1392  * setArgAddressBase : set the base value of an argument indirect address
1393  *
1394  * Enter:    Addr = genx.convert.addr instruction
1395  *           Base = a base register for the Addr
1396  */
setArgAddressBase(Value * Addr,Value * Base)1397 void GenXLiveness::setArgAddressBase(Value *Addr, Value *Base) {
1398   // There might be several addresses for the one base. So, find and erase an
1399   // old address for the input base.
1400   auto Res = ArgAddressBaseMap.find(Addr);
1401   if (Res != ArgAddressBaseMap.end()) {
1402     Value *PreviousBase = Res->second;
1403     auto AddrsRange = BaseToArgAddrMap.equal_range(PreviousBase);
1404     auto ToRemove = std::find_if(AddrsRange.first, AddrsRange.second,
1405                                  [Addr](auto It) { return It.second == Addr; });
1406     IGC_ASSERT_MESSAGE(
1407         ToRemove != AddrsRange.second,
1408         "Addr -> PreviousBase exists in ArgAddressBaseMap. It must "
1409         "exist in BaseToArgAddrMap too.");
1410     BaseToArgAddrMap.erase(ToRemove);
1411   }
1412 
1413   // Set the new connection between address and base.
1414   ArgAddressBaseMap[Addr] = Base;
1415   BaseToArgAddrMap.insert({Base, Addr});
1416 }
1417 
1418 /***********************************************************************
1419  * getAddressBase : get the base register of an address
1420  *
1421  * Enter:   Addr = address conversion (genx.convert.addr instruction)
1422  *
1423  * Return:  The Value for the base that the address is used with, or some
1424  *          other Value that is coalesced with that
1425  */
getAddressBase(Value * Addr)1426 Value *GenXLiveness::getAddressBase(Value *Addr)
1427 {
1428   // Get the base register from the rdregion/wrregion that the index is used
1429   // in. This might involve going via an add or an rdregion.
1430   Use *U = &*Addr->use_begin();
1431   auto user = cast<Instruction>(U->getUser());
1432   while (!U->getOperandNo()) {
1433     U = &*user->use_begin();
1434     user = cast<Instruction>(U->getUser());
1435   }
1436   if (GenXIntrinsic::isRdRegion(user))
1437     return user->getOperand(0);
1438   if (GenXIntrinsic::isWrRegion(user)) {
1439     auto Head = Baling->getBaleHead(user);
1440     if (Head && isa<StoreInst>(Head)) {
1441       Value *V = Head->getOperand(1);
1442       V = getUnderlyingGlobalVariable(V);
1443       IGC_ASSERT_MESSAGE(V, "null base not expected");
1444       return V;
1445     }
1446     return user;
1447   }
1448   // The above scheme does not work for an address conversion added by
1449   // GenXArgIndirection. Instead we have AddressBaseMap to provide the mapping.
1450   auto i = ArgAddressBaseMap.find(Addr);
1451   IGC_ASSERT_MESSAGE(i != ArgAddressBaseMap.end(),
1452     "base register not found for address");
1453   Value *BaseV = i->second;
1454   LiveRange *LR = getLiveRange(BaseV);
1455   // Find a SimpleValue in the live range that is not a struct member.
1456   for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi) {
1457     Value *V = vi->getValue();
1458     if (!isa<StructType>(V->getType()))
1459       return V;
1460   }
1461   IGC_ASSERT_EXIT_MESSAGE(0, "non-struct value not found");
1462 }
1463 
1464 /***********************************************************************
1465  * getAddressWithBase : get addresses that base register is a Base
1466  *
1467  * Enter:   Base = assumed base
1468  *
1469  * Return:  If the input value is a base, return the vector of the corresponding
1470  *          arguments indirect addresses, empty vector otherwise.
1471  */
getAddressWithBase(Value * Base)1472 std::vector<Value *> GenXLiveness::getAddressWithBase(Value *Base) {
1473   auto Res = BaseToArgAddrMap.equal_range(Base);
1474   std::vector<Value *> Bases;
1475   std::transform(Res.first, Res.second, std::back_inserter(Bases),
1476                  [](auto It) { return It.second; });
1477   return Bases;
1478 }
1479 
1480 /***********************************************************************
1481  * isNoopCastCoalesced : see if the no-op cast has been coalesced away
1482  *
1483  * This handles the case that the input and result of the no-op cast are coalesced
1484  * in to the same live range.
1485  */
isNoopCastCoalesced(CastInst * CI)1486 bool GenXLiveness::isNoopCastCoalesced(CastInst *CI)
1487 {
1488   IGC_ASSERT(genx::isNoopCast(CI));
1489   return getLiveRangeOrNull(CI) == getLiveRangeOrNull(CI->getOperand(0));
1490 }
1491 
1492 /***********************************************************************
1493  * dump, print : dump the liveness info
1494  */
1495 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump()1496 void GenXLiveness::dump()
1497 {
1498   print(errs()); errs() << '\n';
1499 }
dump() const1500 void LiveRange::dump() const
1501 {
1502   print(errs()); errs() << '\n';
1503 }
1504 #endif
1505 
printValueLiveness(Value * V,raw_ostream & OS) const1506 void GenXLiveness::printValueLiveness(Value *V, raw_ostream &OS) const {
1507   const LiveRange *LR = getLiveRangeOrNull(V);
1508   if (!LR)
1509     return;
1510   // Only show an LR if the map iterator is on the value that appears first
1511   // in the LR. That avoids printing the same LR multiple times.
1512   if (V != LR->value_begin()->getValue())
1513     return;
1514 
1515   LR->print(OS);
1516   OS << "\n";
1517 }
1518 
print(raw_ostream & OS,const FunctionGroup * dummy) const1519 void GenXLiveness::print(raw_ostream &OS, const FunctionGroup *dummy) const {
1520   OS << "GenXLiveness for FunctionGroup " << FG->getName() << "\n";
1521   // Print live ranges for global variables;
1522   for (auto &G : FG->getModule()->globals())
1523     printValueLiveness(&G, OS);
1524   for (auto i = FG->begin(), e = FG->end(); i != e; ++i) {
1525     Function *Func = *i;
1526     // Print live ranges for args.
1527     for (auto fi = Func->arg_begin(), fe = Func->arg_end(); fi != fe; ++fi)
1528       printValueLiveness(&*fi, OS);
1529     // Print live range(s) for unified return value.
1530     if (i != FG->begin() && !Func->getReturnType()->isVoidTy()) {
1531       auto Ret = getUnifiedRetIfExist(Func);
1532       if (Ret)
1533         printValueLiveness(Ret, OS);
1534     }
1535     // Print live ranges for code.
1536     for (auto &Inst : instructions(Func))
1537       printValueLiveness(&Inst, OS);
1538   }
1539   OS << "\n";
1540 }
1541 
1542 /***********************************************************************
1543  * LiveRange::testLiveRanges : tests if no segments abut or overlap or are
1544  * in the wrong order. Live range's segments should be well formed.
1545  */
testLiveRanges() const1546 bool LiveRange::testLiveRanges() const
1547 {
1548   auto testAdjacentSegments = [](const Segment& A, const Segment& B)
1549   {
1550     const bool P1 = (A.Strength != B.Strength);
1551     const bool P2 = (A.getEnd() < B.getStart());
1552     const bool SegmentIsGood = (P1 || P2);
1553     IGC_ASSERT(SegmentIsGood);
1554     const bool StopSearch = !SegmentIsGood;
1555     return StopSearch;
1556   };
1557 
1558   const_iterator Begin = begin();
1559   const_iterator End = end();
1560   const_iterator Failed = std::adjacent_find(Begin, End, testAdjacentSegments);
1561 
1562   const bool Result = (Failed == End);
1563   return Result;
1564 }
1565 
1566 /***********************************************************************
1567  * LiveRange::addSegment : add a segment to a live range
1568  *
1569  * The segment might already be covered by an existing segment, in which
1570  * case nothing changes.
1571  *
1572  * It would be inefficient to implement coalesce() in terms of this, because
1573  * it might have to shuffle lots of elements along by one each time.
1574  * This function only gets called when adding a single segment to a live
1575  * range when inserting a copy in coalescing.
1576  */
addSegment(Segment Seg)1577 void LiveRange::addSegment(Segment Seg)
1578 {
1579   iterator i = find(Seg.getStart()), e = end();
1580   if (i == e) {
1581     // New segment off end.
1582     Segments.push_back(Seg);
1583   } else if (i->getStart() <= Seg.getStart()) {
1584     // New segment is covered by or overlaps the end of old segment i.
1585     if (i->getEnd() < Seg.getEnd()) {
1586       i->setEnd(Seg.getEnd());
1587       // See if it covers or overlaps any later segments.
1588       iterator j = i + 1;
1589       while (j != e) {
1590         if (j->getStart() > Seg.getEnd())
1591           break;
1592         i->setEnd(j->getEnd());
1593         if (j->getEnd() >= Seg.getEnd())
1594           break;
1595         ++j;
1596       }
1597       Segments.erase(i + 1, j);
1598     }
1599   } else if (i->getStart() == Seg.getEnd()) {
1600     // New segment abuts start of old segment i, without abutting or
1601     // overlapping anything before.
1602     i->setStart(Seg.getStart());
1603   } else {
1604     // New segment is completely in a hole just before i.
1605     Segments.insert(i, Seg);
1606   }
1607   IGC_ASSERT(testLiveRanges());
1608 }
1609 
1610 /***********************************************************************
1611  * LiveRange::setSegmentsFrom : for this live range, clear out its segments
1612  *    and copy them from the other live range
1613  */
setSegmentsFrom(LiveRange * Other)1614 void LiveRange::setSegmentsFrom(LiveRange *Other)
1615 {
1616   Segments.clear();
1617   Segments.append(Other->Segments.begin(), Other->Segments.end());
1618 }
1619 
1620 /***********************************************************************
1621  * LiveRange::addSegments : add segments of LR2 into this
1622  */
addSegments(LiveRange * LR2)1623 void LiveRange::addSegments(LiveRange *LR2)
1624 {
1625   Segments.append(LR2->Segments.begin(), LR2->Segments.end());
1626 }
1627 
1628 /***********************************************************************
1629  * LiveRange::sortAndMerge : after doing some push_backs, sort the segments,
1630  *      and merge overlapping/adjacent ones
1631  */
sortAndMerge()1632 void LiveRange::sortAndMerge() {
1633   std::sort(Segments.begin(), Segments.end());
1634 
1635   // Ensure that there are no duplicate segments:
1636   Segments_t::iterator ip;
1637   ip = std::unique(Segments.begin(), Segments.end());
1638   Segments.resize(std::distance(Segments.begin(), ip));
1639 
1640   Segments_t SegmentsSortedEnd = Segments;
1641   std::sort(SegmentsSortedEnd.begin(), SegmentsSortedEnd.end(),
1642             [](Segment L, Segment R) {
1643               if (L.getEnd() != R.getEnd())
1644                 return L.getEnd() < R.getEnd();
1645               return L.getStart() < R.getStart();
1646             });
1647 
1648   Segments_t NewSegments;
1649   std::unordered_set<Segment> OpenedSegments;
1650   Segment *SS = Segments.begin();
1651   Segment *ES = SegmentsSortedEnd.begin();
1652   unsigned prevBorder = 0;
1653   unsigned curBorder = 0;
1654   bool isStartBorder;
1655 
1656   // Split & Merge
1657   while (ES != SegmentsSortedEnd.end()) {
1658     if (SS != Segments.end() && SS->getStart() < ES->getEnd()) {
1659       isStartBorder = true;
1660       curBorder = SS->getStart();
1661     } else {
1662       isStartBorder = false;
1663       curBorder = ES->getEnd();
1664     }
1665 
1666     // To create or extend segment, first check that there are
1667     // open segments or that we haven't already created or extended one
1668     if (OpenedSegments.size() > 0 && prevBorder < curBorder) {
1669       Segment NS =
1670           *std::max_element(OpenedSegments.begin(), OpenedSegments.end(),
1671                             [](Segment L, Segment R) {
1672                                return L.Strength < R.Strength;
1673                             }); // New segment
1674       if (NewSegments.size() > 0 &&
1675           NewSegments.rbegin()->getEnd() == prevBorder &&
1676           // This segment and previous segment abut or overlap. Merge
1677           // as long as they have the same strength.
1678           (NS.Strength == NewSegments.rbegin()->Strength ||
1679            // Also allow for the case that the first one is strong and the
1680            // second one is phicpy. The resulting merged segment is strong,
1681            // because a phicpy segment is valid only if it starts in the
1682            // same place as when it was originally created and there is no
1683            // liveness just before it.
1684            (NS.Strength == Segment::PHICPY &&
1685             NewSegments.rbegin()->Strength == Segment::STRONG))) {
1686         // In these cases we can extend
1687         NewSegments.rbegin()->setEnd(curBorder);
1688       } else {
1689         NS.setStart(prevBorder);
1690         NS.setEnd(curBorder);
1691         NewSegments.push_back(NS);
1692       }
1693     }
1694     prevBorder = curBorder;
1695     if (isStartBorder)
1696       OpenedSegments.insert(*SS++);
1697     else
1698       OpenedSegments.erase(*ES++);
1699   }
1700   Segments = NewSegments;
1701 }
1702 
1703 /***********************************************************************
1704  * LiveRange::prepareFuncs : fill the Funcs set with kernel or stack functions
1705  * which this LR is alive in
1706  *
1707  * To support RegAlloc for function groups that consist of kernel and stack
1708  * functions we have to track which kernel/stack functions the LR spans across.
1709  *
1710  */
prepareFuncs(FunctionGroupAnalysis * FGA)1711 void LiveRange::prepareFuncs(FunctionGroupAnalysis *FGA) {
1712   for (auto &Val : getValues()) {
1713     auto Inst = dyn_cast<Instruction>(Val.getValue());
1714     Function *DefFunc = nullptr;
1715     if (Inst && Inst->getParent())
1716       DefFunc = Inst->getFunction();
1717     else if (auto Arg = dyn_cast<Argument>(Val.getValue()))
1718       DefFunc = Arg->getParent();
1719 
1720     if (DefFunc) {
1721       auto *DefFuncFG = FGA->getAnyGroup(DefFunc);
1722       IGC_ASSERT_MESSAGE(DefFuncFG, "Cannot find the function group");
1723       Funcs.insert(DefFuncFG->getHead());
1724     }
1725 
1726     for (auto U : Val.getValue()->users())
1727       if (Instruction *UserInst = dyn_cast<Instruction>(U)) {
1728         auto F = UserInst->getFunction();
1729         auto *FG = FGA->getAnyGroup(F);
1730         IGC_ASSERT_MESSAGE(FG, "Cannot find the function group");
1731         Funcs.insert(FG->getHead());
1732       }
1733   }
1734 }
1735 
1736 /***********************************************************************
1737  * LiveRange::getLength : add up the number of instructions covered by this LR
1738  */
getLength(bool WithWeak) const1739 unsigned LiveRange::getLength(bool WithWeak) const {
1740   unsigned Length = 0;
1741   for (auto i = begin(), e = end(); i != e; ++i) {
1742     if (i->isWeak() && !WithWeak)
1743       continue;
1744     Length += i->getEnd() - i->getStart();
1745   }
1746   return Length;
1747 }
1748 
1749 /***********************************************************************
1750  * LiveRange::print : print the live range
1751  * Simplevalues of LR : segments { details }
1752  * Detailed mode exists to print LLVM values
1753  */
print(raw_ostream & OS,bool Details) const1754 void LiveRange::print(raw_ostream &OS, bool Details) const {
1755   if (Details)
1756     OS << "LR values:\n";
1757 
1758   if (Values.empty()) {
1759     OS << "<Empty LR>";
1760     return;
1761   }
1762 
1763   for (auto &&V : Values) {
1764     if (!Details)
1765       OS << V << "; ";
1766     else
1767       OS << *V.getValue() << "\n";
1768   }
1769 
1770   if (!Details)
1771     OS << ":";
1772   else
1773     OS << "LR Segments and details: ";
1774 
1775   if (Segments.empty())
1776     OS << "<Empty Segments>";
1777   else
1778     printSegments(OS);
1779 
1780   const char *Cat = "???";
1781   switch (Category) {
1782     case RegCategory::NONE: Cat = "none"; break;
1783     case RegCategory::GENERAL: Cat = "general"; break;
1784     case RegCategory::ADDRESS: Cat = "address"; break;
1785     case RegCategory::PREDICATE: Cat = "predicate"; break;
1786     case RegCategory::SAMPLER: Cat = "sampler"; break;
1787     case RegCategory::SURFACE: Cat = "surface"; break;
1788     case RegCategory::EM: Cat = "em"; break;
1789     case RegCategory::RM: Cat = "rm"; break;
1790   }
1791 
1792   OS << "{" << Cat << ",align" << (1U << LogAlignment);
1793   if (Offset)
1794     OS << ",offset" << Offset;
1795   OS << "}";
1796 }
1797 
1798 /***********************************************************************
1799  * LiveRange::printSegments : print the live range's segments
1800  */
printSegments(raw_ostream & OS) const1801 void LiveRange::printSegments(raw_ostream &OS) const
1802 {
1803   for (auto ri = Segments.begin(), re = Segments.end();
1804       ri != re; ++ri) {
1805     OS << "[";
1806     switch (ri->Strength) {
1807       case Segment::WEAK: OS << "w"; break;
1808       case Segment::PHICPY: OS << "ph"; break;
1809       case Segment::STRONG: /* do nothing */ break;
1810     }
1811     OS << ri->getStart() << "," << ri->getEnd() << ")";
1812   }
1813 }
1814 
1815 /***********************************************************************
1816  * IndexFlattener::flatten : convert struct indices into a flattened index
1817  *
1818  * This has a special case of Indices having a single element that is the
1819  * number of elements in ST, which returns the total number of flattened
1820  * indices in the struct.
1821  *
1822  * This involves scanning through the struct layout each time it is called.
1823  * If it is used a lot, it might benefit from some cacheing of the results.
1824  */
flatten(StructType * ST,ArrayRef<unsigned> Indices)1825 unsigned IndexFlattener::flatten(StructType *ST, ArrayRef<unsigned> Indices)
1826 {
1827   if (!Indices.size())
1828     return 0;
1829   unsigned Flattened = 0;
1830   unsigned i = 0;
1831   for (; i != Indices[0]; ++i) {
1832     Type *ElTy = ST->getElementType(i);
1833     if (auto ElST = dyn_cast<StructType>(ElTy))
1834       Flattened += flatten(ElST, ElST->getNumElements());
1835     else
1836       ++Flattened;
1837   }
1838   if (i == ST->getNumElements())
1839     return Flattened; // handle special case noted at the top
1840   Type *ElTy = ST->getElementType(i);
1841   if (auto ElST = dyn_cast<StructType>(ElTy))
1842     Flattened += flatten(ElST, Indices.slice(1));
1843   return Flattened;
1844 }
1845 
1846 /***********************************************************************
1847  * IndexFlattener::unflatten : convert flattened index into struct indices
1848  *
1849  * Enter:   Indices = vector to put unflattened indices into
1850  *
1851  * Return:  number left over from flattened index if it goes off the end
1852  *          of the struct (used internally when recursing). If this is
1853  *          non-zero, nothing has been pushed into Indices
1854  *
1855  * This involves scanning through the struct layout each time it is called.
1856  * If it is used a lot, it might benefit from some cacheing of the results.
1857  */
unflatten(StructType * ST,unsigned Flattened,SmallVectorImpl<unsigned> * Indices)1858 unsigned IndexFlattener::unflatten(StructType *ST, unsigned Flattened,
1859     SmallVectorImpl<unsigned> *Indices)
1860 {
1861   ++Flattened;
1862   for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1863     --Flattened;
1864     Type *ElTy = ST->getElementType(i);
1865     if (auto ElST = dyn_cast<StructType>(ElTy)) {
1866       Indices->push_back(i);
1867       Flattened = unflatten(ElST, Flattened, Indices);
1868       if (!Flattened)
1869         return 0;
1870       Indices->pop_back();
1871     } else if (!Flattened) {
1872       Indices->push_back(i);
1873       return 0;
1874     }
1875   }
1876   return Flattened;
1877 }
1878 
1879 /***********************************************************************
1880  * IndexFlattener::getElementType : get type of struct element from
1881  *    flattened index
1882  *
1883  * Enter:   Ty = type, possibly struct type
1884  *          FlattenedIndex = flattened index in the struct, 0 if not struct
1885  *
1886  * Return:  type of that element
1887  */
getElementType(Type * Ty,unsigned FlattenedIndex)1888 Type *IndexFlattener::getElementType(Type *Ty, unsigned FlattenedIndex)
1889 {
1890   auto ST = dyn_cast<StructType>(Ty);
1891   if (!ST)
1892     return Ty;
1893   SmallVector<unsigned, 4> Indices;
1894   IndexFlattener::unflatten(ST, FlattenedIndex, &Indices);
1895   IGC_ASSERT(IndexFlattener::flatten(ST, Indices) == FlattenedIndex);
1896   Type *T = 0;
1897   for (unsigned i = 0;;) {
1898     T = ST->getElementType(Indices[i]);
1899     if (++i == Indices.size())
1900       return T;
1901     ST = cast<StructType>(T);
1902   }
1903 }
1904 
1905 /***********************************************************************
1906  * IndexFlattener::flattenArg : flatten an arg in a function or call
1907  *
1908  * This calculates the total number of flattened indices used up by previous
1909  * args. If all previous args are not struct type, then this just returns the
1910  * arg index.
1911  */
flattenArg(FunctionType * FT,unsigned ArgIndex)1912 unsigned IndexFlattener::flattenArg(FunctionType *FT, unsigned ArgIndex)
1913 {
1914   unsigned FlattenedIndex = 0;
1915   while (ArgIndex--) {
1916     Type *ArgTy = FT->getParamType(ArgIndex);
1917     FlattenedIndex += getNumElements(ArgTy);
1918   }
1919   return FlattenedIndex;
1920 }
1921 
1922 /***********************************************************************
1923  * SimpleValue::getType : get the type of the SimpleValue
1924  */
getType() const1925 Type *SimpleValue::getType() const {
1926   return IndexFlattener::getElementType(V->getType(), Index);
1927 }
1928 
1929 /***********************************************************************
1930  * dump, print : debug print a SimpleValue
1931  */
1932 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const1933 void SimpleValue::dump() const
1934 {
1935   print(errs()); errs() << '\n';
1936 }
1937 #endif
print(raw_ostream & OS) const1938 void SimpleValue::print(raw_ostream &OS) const
1939 {
1940   OS << V->getName();
1941   if (Index || isa<StructType>(V->getType()))
1942     OS << "#" << Index;
1943 }
printName(raw_ostream & OS) const1944 void SimpleValue::printName(raw_ostream &OS) const
1945 {
1946   OS << V->getName();
1947   if (Index || isa<StructType>(V->getType()))
1948     OS << "#" << Index;
1949 }
1950 
1951 /***********************************************************************
1952  * CallGraph::build : build the call graph for the FunctionGroup
1953  *
1954  * The call graph is acyclic because no recursive edges added here
1955  * CM supports recursion though
1956  */
build(GenXLiveness * Liveness)1957 void genx::CallGraph::build(GenXLiveness *Liveness) {
1958   Nodes.clear();
1959   // Create a node for each Function.
1960   for (auto fgi = FG->begin(), fge = FG->end(); fgi != fge; ++fgi) {
1961     Function *F = *fgi;
1962     (void)Nodes[F];
1963   }
1964   // For each Function, find its call sites and add edges for them.
1965   for (auto fgi = FG->begin() + 1, fge = FG->end(); fgi != fge; ++fgi) {
1966     Function *F = *fgi;
1967     for (Value::use_iterator ui = F->use_begin(), ue = F->use_end(); ui != ue;
1968          ++ui) {
1969       // TODO: deduce possible callsites thru cast chains
1970       if (isa<CallInst>(ui->getUser())) {
1971         auto Call = cast<CallInst>(ui->getUser());
1972         auto Caller = Call->getParent()->getParent();
1973         // do not add edges for indirect, recursive and intrinsic calls
1974         if (Call->getCalledFunction() &&
1975             !GenXIntrinsic::isAnyNonTrivialIntrinsic(Call->getCalledFunction()) &&
1976             Caller != F)
1977           Nodes[Caller].insert(
1978               Edge(Liveness->getNumbering()->getNumber(Call), Call));
1979       }
1980     }
1981   }
1982 }
1983 
1984 INITIALIZE_PASS_BEGIN(GenXLivenessWrapper, "GenXLivenessWrapper",
1985                       "GenXLivenessWrapper", false, false)
1986 INITIALIZE_PASS_END(GenXLivenessWrapper, "GenXLivenessWrapper",
1987                     "GenXLivenessWrapper", false, false)
1988 
createGenXLivenessWrapperPass()1989 ModulePass *llvm::createGenXLivenessWrapperPass() {
1990   initializeGenXLivenessWrapperPass(*PassRegistry::getPassRegistry());
1991   return new GenXLivenessWrapper();
1992 }
1993