1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 //
10 // GenXLiveness is an analysis that contains the liveness information for the
11 // values in the code. See the comment at the top of GenXLiveness.h for further
12 // details.
13 //
14 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "GENX_LIVENESS"
16
17 #include "GenXLiveness.h"
18 #include "GenX.h"
19 #include "GenXBaling.h"
20 #include "GenXIntrinsics.h"
21 #include "GenXNumbering.h"
22 #include "GenXSubtarget.h"
23 #include "GenXTargetMachine.h"
24 #include "GenXUtil.h"
25 #include "vc/GenXOpts/GenXAnalysis.h"
26 #include "vc/GenXOpts/Utils/InternalMetadata.h"
27 #include "vc/GenXOpts/Utils/RegCategory.h"
28
29 #include "llvm/GenXIntrinsics/GenXMetadata.h"
30
31 #include "Probe/Assertion.h"
32 #include "llvmWrapper/IR/InstrTypes.h"
33 #include "llvmWrapper/IR/Instructions.h"
34
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/CodeGen/TargetPassConfig.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/DiagnosticInfo.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/InstIterator.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/Support/Debug.h"
46
47 #include <unordered_set>
48
49 using namespace llvm;
50 using namespace genx;
51
SimpleValue(const AssertingSV & ASV)52 SimpleValue::SimpleValue(const AssertingSV &ASV)
53 : SimpleValue(ASV.getValue(), ASV.getIndex()) {}
54
getAnalysisUsage(AnalysisUsage & AU)55 void GenXLiveness::getAnalysisUsage(AnalysisUsage &AU) {
56 AU.addRequired<TargetPassConfig>();
57 AU.setPreservesAll();
58 }
59
60 /***********************************************************************
61 * runOnFunctionGroup : do nothing
62 */
runOnFunctionGroup(FunctionGroup & ArgFG)63 bool GenXLiveness::runOnFunctionGroup(FunctionGroup &ArgFG)
64 {
65 FG = &ArgFG;
66 Subtarget = &getAnalysis<TargetPassConfig>()
67 .getTM<GenXTargetMachine>()
68 .getGenXSubtarget();
69 DL = &ArgFG.getModule()->getDataLayout();
70 return false;
71 }
72
73 /***********************************************************************
74 * releaseMemory: clear the GenXLiveness
75 */
releaseMemory()76 void GenXLiveness::releaseMemory() {
77 while (!LiveRangeMap.empty()) {
78 LiveRange *LR = begin()->second;
79 for (auto i = LR->value_begin(), e = LR->value_end(); i != e; ++i) {
80 SimpleValue V = *i;
81 LiveRangeMap.erase(V);
82 }
83 delete LR;
84 }
85 FG = 0;
86 CG.reset();
87 for (auto i = UnifiedRets.begin(), e = UnifiedRets.end(); i != e; ++i)
88 i->second->deleteValue();
89 UnifiedRets.clear();
90 UnifiedRetToFunc.clear();
91 ArgAddressBaseMap.clear();
92 BaseToArgAddrMap.clear();
93 }
94
95 /***********************************************************************
96 * setLiveRange : add a SimpleValue to a LiveRange
97 *
98 * This:
99 * 1. adds the SimpleValue to the LiveRange's value list;
100 * 2. sets the SimpleValue's entry in the map to point to the LiveRange.
101 */
setLiveRange(SimpleValue V,LiveRange * LR)102 void GenXLiveness::setLiveRange(SimpleValue V, LiveRange *LR)
103 {
104 IGC_ASSERT_MESSAGE(
105 LiveRangeMap.find(V) == end(),
106 "Attempting to set LiveRange for Value that already has one");
107 LR->addValue(V);
108 LiveRangeMap[V] = LR;
109 LR->setAlignmentFromValue(
110 *DL, V, Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize);
111 }
112
113 /***********************************************************************
114 * setAlignmentFromValue : set a live range's alignment from a value
115 */
setAlignmentFromValue(const DataLayout & DL,const SimpleValue V,const unsigned GRFWidth)116 void LiveRange::setAlignmentFromValue(const DataLayout &DL, const SimpleValue V,
117 const unsigned GRFWidth) {
118 const unsigned AlignInBytes = getValueAlignmentInBytes(*(V.getValue()), DL);
119 const unsigned LogByteAlign = Log2_32(AlignInBytes);
120 // Set max alignment to GRF.
121 const unsigned MaxLogAlign =
122 genx::getLogAlignment(VISA_Align::ALIGN_GRF, GRFWidth);
123 const unsigned SaturatedLogAlign = std::clamp(LogByteAlign, 0u, MaxLogAlign);
124 setLogAlignment(ceilLogAlignment(SaturatedLogAlign, GRFWidth));
125 }
126
127 /***********************************************************************
128 * rebuildCallGraph : rebuild GenXLiveness's call graph
129 */
rebuildCallGraph()130 void GenXLiveness::rebuildCallGraph()
131 {
132 CG = std::make_unique<genx::CallGraph>(FG);
133 CG->build(this);
134 }
135
136 /***********************************************************************
137 * buildSubroutineLRs : build the subroutine LRs
138 *
139 * If the FunctionGroup has subroutines, then each one (each Function other
140 * than the head one) gets a "subroutine LR", giving the live range
141 * of the whole subroutine plus any other subroutines it can call.
142 * Then, when building a real live range later, if it goes over a call,
143 * we can add the subroutine LR.
144 *
145 * The subroutine LR has weak liveness, as that's what we want to add to
146 * anything live over a call to the subroutine.
147 */
buildSubroutineLRs()148 void GenXLiveness::buildSubroutineLRs()
149 {
150 if (FG->size() == 1)
151 return; // no subroutines
152 // Build a call graph for the FunctionGroup. It is acyclic because there is
153 // no recursion.
154 rebuildCallGraph();
155 // Depth-first walk the graph to propagate live ranges upwards.
156 visitPropagateSLRs(FG->getHead());
157 }
158
159 /***********************************************************************
160 * visitPropagateSLRs : visit a callgraph node to propagate subroutine LR
161 *
162 * This is recursive.
163 */
visitPropagateSLRs(Function * F)164 LiveRange *GenXLiveness::visitPropagateSLRs(Function *F)
165 {
166 LiveRange *LR = getOrCreateLiveRange(F);
167 // Add a segment for just this function.
168 LR->push_back(Segment(Numbering->getNumber(F),
169 Numbering->getNumber(F->back().getTerminator()) + 1, Segment::WEAK));
170 // For each child...
171 genx::CallGraph::Node *N = CG->getNode(F);
172 for (auto i = N->begin(), e = N->end(); i != e; ++i) {
173 // Visit the child to calculate its LR.
174 LiveRange *ChildLR = visitPropagateSLRs(i->Call->getCalledFunction());
175 // Merge it into ours.
176 LR->addSegments(ChildLR);
177 }
178 LR->sortAndMerge();
179 return LR;
180 }
181
182 /***********************************************************************
183 * buildLiveRange : build live range for one value (arg or non-baled inst)
184 *
185 * For a struct value, each element's live range is built separately, even
186 * though they are almost identical. They are not exactly identical,
187 * differing at the def if it is the return value of a call, and at a use
188 * that is a call arg.
189 */
buildLiveRange(Value * V)190 void GenXLiveness::buildLiveRange(Value *V)
191 {
192 auto ST = dyn_cast<StructType>(V->getType());
193 if (!ST) {
194 buildLiveRange(SimpleValue(V));
195 return;
196 }
197 for (unsigned i = 0, e = IndexFlattener::getNumElements(ST); i != e; ++i)
198 buildLiveRange(SimpleValue(V, i));
199 }
200
201 /***********************************************************************
202 * buildLiveRange : build live range for one SimpleValue
203 *
204 * rebuildLiveRange : rebuild live range for a LiveRange struct
205 *
206 * The BBs[] array, one entry per basic block, is temporarily used here to
207 * store the live range for the value within that block. We start by
208 * registering the short live range for the definition, then, for each use,
209 * create a live range in the use's block then recursively scan back
210 * through predecessors until we meet a block where there is already a
211 * live range. This is guaranteed to terminate because of the dominance
212 * property of SSA.
213 *
214 * See Appel "Modern Compiler Implementation in C" 19.6.
215 *
216 * rebuildLiveRange can be called from later passes to rebuild the segments
217 * for a particular live range. If used after coalescing, the live range might
218 * have more than one value, in which case segments are added for each value
219 * and then merged. Thus we assume that, after whatever code change a pass made
220 * to require rebuilding the live range, the coalesced values can still be
221 * validly coalesced, without having any way of checking that.
222 *
223 */
buildLiveRange(SimpleValue V)224 LiveRange *GenXLiveness::buildLiveRange(SimpleValue V)
225 {
226 LiveRange *LR = getOrCreateLiveRange(V);
227 rebuildLiveRange(LR);
228 return LR;
229 }
230
rebuildLiveRange(LiveRange * LR)231 void GenXLiveness::rebuildLiveRange(LiveRange *LR)
232 {
233 LR->getOrDefaultCategory();
234 LR->Segments.clear();
235 for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi)
236 rebuildLiveRangeForValue(LR, *vi);
237 LR->sortAndMerge();
238 }
239
rebuildLiveRangeForValue(LiveRange * LR,SimpleValue SV)240 void GenXLiveness::rebuildLiveRangeForValue(LiveRange *LR, SimpleValue SV)
241 {
242 Value *V = SV.getValue();
243
244 // This value is a global variable. Its live range is the entire kernel.
245 if (auto GV = getUnderlyingGlobalVariable(V)) {
246 (void)GV;
247 LR->push_back(0, Numbering->getLastNumber());
248 return;
249 }
250
251 std::map<BasicBlock *, Segment> BBRanges;
252 if (auto Func = isUnifiedRet(V)) {
253 // This value is the unified return value of the function Func. Its live
254 // range is from the call to where its post-copy would go just afterwards
255 // for each call site, also from the site of the pre-copy to the return
256 // instruction.
257 for (auto *U: Func->users()) {
258 if (auto *CI = genx::checkFunctionCall(U, Func))
259 LR->push_back(Numbering->getNumber(CI),
260 Numbering->getRetPostCopyNumber(CI, SV.getIndex()));
261 }
262 for (auto fi = Func->begin(), fe = Func->end(); fi != fe; ++fi)
263 if (auto RI = dyn_cast<ReturnInst>(fi->getTerminator()))
264 LR->push_back(Numbering->getRetPreCopyNumber(RI, SV.getIndex()),
265 Numbering->getNumber(RI));
266 return;
267 }
268
269 // Mark the value as live and then almost immediately dead again at the
270 // point where it is defined.
271 unsigned StartNum = 0, EndNum = 0;
272 Function *Func = 0;
273 auto Arg = dyn_cast<Argument>(V);
274 BasicBlock *BB = nullptr;
275 if (Arg) {
276 Func = Arg->getParent();
277 StartNum = Numbering->getNumber(Func);
278 EndNum = StartNum + 1;
279 BB = &Func->front();
280 } else if (auto Phi = dyn_cast<PHINode>(V)) {
281 // Phi node. Treat as defined at the start of the block.
282 EndNum = Numbering->getNumber(Phi) + 1;
283 BB = Phi->getParent();
284 StartNum = Numbering->getNumber(BB);
285 // For a phi node, we also need to register an extra little live range at
286 // the end of each predecessor, from where we will insert a copy to the
287 // end. This is done lower down in this function.
288 } else {
289 StartNum = Numbering->getNumber(V);
290 auto Inst = cast<Instruction>(V);
291 BB = Inst->getParent();
292 auto CI = dyn_cast<CallInst>(V);
293 if (CI) {
294 if (!GenXIntrinsic::isAnyNonTrivialIntrinsic(V)) {
295 // For the return value from a call, move the definition point to the ret
296 // post-copy slot after the call, where the post-copy will be inserted if
297 // it fails to be coalesced with the function's unified return value.
298 StartNum = Numbering->getRetPostCopyNumber(CI, SV.getIndex());
299 }
300 }
301 EndNum = StartNum + 1;
302 if (CI && getTwoAddressOperandNum(CI)) {
303 // Two address op. Move the definition point one earlier, to where
304 // GenXCoalescing will need to insert a copy if coalescing fails.
305 --StartNum;
306 }
307 }
308 BBRanges[BB] = Segment(StartNum, EndNum);
309 // The stack for predecessors that need to be processed:
310 std::vector<BasicBlock *> Stack;
311 // Process each use.
312 for (Value::use_iterator i = V->use_begin(), e = V->use_end();
313 i != e; ++i) {
314 BasicBlock *BB = nullptr;
315 Instruction *user = cast<Instruction>(i->getUser());
316 unsigned Num;
317 if (PHINode *Phi = dyn_cast<PHINode>(user)) {
318 // Use in a phi node. We say that the use is where the phi copy will be
319 // placed in the predecessor block.
320 BB = Phi->getIncomingBlock(*i);
321 Num = Numbering->getPhiNumber(Phi, BB);
322 } else {
323 // Normal use.
324 // For live range purposes, an instruction is considered to be at the
325 // same place as the head of its bale. We need to use getBaleHead to
326 // ensure that we consider it to be there.
327 Instruction *UserHead = Baling->getBaleHead(user);
328 BB = UserHead->getParent();
329 Num = Numbering->getNumber(UserHead);
330 if (auto CI = dyn_cast<CallInst>(user)) {
331 if (CI->isInlineAsm() || IGCLLVM::isIndirectCall(*CI))
332 Num = Numbering->getNumber(UserHead);
333 else {
334 switch (GenXIntrinsic::getAnyIntrinsicID(CI)) {
335 case GenXIntrinsic::not_any_intrinsic:
336 // Use as a call arg. We say that the use is at the arg pre-copy
337 // slot, where the arg copy will be inserted in coalescing. This
338 // assumes that the copies will be in the same order as args in the
339 // call, with struct elements in order too.
340 Num = Numbering->getArgPreCopyNumber(CI, i->getOperandNo(),
341 SV.getIndex());
342 break;
343 default:
344 if (auto OpndNum = getTwoAddressOperandNum(CI);
345 OpndNum && *OpndNum == i->getOperandNo()) {
346 // The use is the two address operand in a two address op. Move
347 // the use point one earlier, to where GenXCoalescing will need
348 // to insert a copy if coalescing fails. If there is any other
349 // use of this value in the same bale, that will not have its use
350 // point one number earlier. The unnecessary interference that
351 // would cause is fixed in the way that twoAddrInterfere()
352 // detects interference.
353 --Num;
354 }
355 break;
356 case GenXIntrinsic::genx_simdcf_goto:
357 // Use in a goto. Treat it as at the branch, as GenXCisaBuilder
358 // writes the goto just before the branch, after any intervening IR.
359 Num = Numbering->getNumber(CI->getParent()->getTerminator());
360 break;
361 }
362 }
363 } else if (auto RI = dyn_cast<ReturnInst>(user)) {
364 // Use in a return. We say that the use is where the ret value
365 // pre-copy will be inserted in coalescing. This assumes that the
366 // copies will be in the same order as the struct elements in the
367 // return value.
368 Num = Numbering->getRetPreCopyNumber(RI, SV.getIndex());
369 }
370 }
371 auto BBRange = &BBRanges[BB];
372 if (BBRange->getEnd()) {
373 // There is already a live range in this block. Extend it if
374 // necessary. No need to scan back from here, so we're done with
375 // this use.
376 if (BBRange->getEnd() < Num)
377 BBRange->setEnd(Num);
378 continue;
379 }
380 // Add a new live range from the start of this block, and remember the
381 // range of blocks that contain a live range (so we don't have to scan
382 // all of them at the end).
383 *BBRange = Segment(Numbering->getNumber(BB), Num);
384 // Push this block's predecessors onto the stack.
385 std::copy(pred_begin(BB), pred_end(BB), std::back_inserter(Stack));
386 // Process stack until empty.
387 while (Stack.size()) {
388 BB = Stack.back();
389 Stack.pop_back();
390 BBRange = &BBRanges[BB];
391 auto BBNum = Numbering->getBBNumber(BB);
392 if (BBRange->getEnd()) {
393 // There is already a live range in this block. Extend it to the end.
394 // No need to scan back from here.
395 BBRange->setEnd(BBNum->EndNumber);
396 continue;
397 }
398 // Add a new live range through the whole of this block, and remember the
399 // range of blocks that contain a live range (so we don't have to scan
400 // all of them at the end).
401 BBRange->setStartEnd(Numbering->getNumber(BB), BBNum->EndNumber);
402 // Push this block's predecessors onto the stack.
403 std::copy(pred_begin(BB), pred_end(BB), std::back_inserter(Stack));
404 }
405 }
406 // Now we can build the live range.
407 for (auto bri = BBRanges.begin(), bre = BBRanges.end(); bri != bre; ++bri) {
408 auto BBRange = &bri->second;
409 LR->push_back(*BBRange);
410 }
411 if (PHINode *Phi = dyn_cast<PHINode>(V)) {
412 // For a phi node, we also need to register an extra little live range at
413 // the end of each predecessor, from where we will insert a copy to the
414 // end.
415 for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
416 auto Pred = Phi->getIncomingBlock(i);
417 auto BBNum = Numbering->getBBNumber(Pred);
418 LR->push_back(Segment(Numbering->getPhiNumber(Phi, Pred),
419 BBNum->EndNumber, Segment::PHICPY));
420 }
421 }
422 LR->sortAndMerge();
423 if (CG) {
424 // Check if the live range crosses any call instruction. If so, add the
425 // appropriate subroutine live range.
426 bool NeedSort = false;
427 auto N = CG->getNode(Func);
428 for (auto i = N->begin(), e = N->end(); i != e; ++i) {
429 auto E = &*i;
430 // See if this call is in a segment of the LR.
431 auto Seg = LR->find(E->Number);
432 if (Seg != LR->end() && Seg->getStart() <= E->Number && Seg->getEnd() > E->Number) {
433 // Yes it is. Merge the subroutine LR of the callee into our LR.
434 if (!E->Call->getCalledFunction()->hasFnAttribute("CMStackCall"))
435 LR->addSegments(getLiveRange(E->Call->getCalledFunction()));
436 NeedSort = true;
437 }
438 }
439 if (NeedSort)
440 LR->sortAndMerge();
441 }
442 if (Arg) {
443 // For a function arg, for each call site, add a segment from the arg
444 // pre-copy site, the point just before the call at which it will be copied
445 // into, up to the call. We assume that any copies before the call
446 // inserted by coalescing will be in the obvious order of args and elements
447 // within args.
448 Function *F = Arg->getParent();
449 if (*FG->begin() != F) { // is a subroutine
450 for (auto *U : F->users()) {
451 if (auto *CI = genx::checkFunctionCall(U, F))
452 LR->push_back(Numbering->getArgPreCopyNumber(CI, Arg->getArgNo(),
453 SV.getIndex()),
454 Numbering->getNumber(CI));
455 }
456 }
457 }
458 }
459
removeBale(Bale & B)460 void GenXLiveness::removeBale(Bale &B) {
461 for (auto bi = B.begin(), be = B.end(); bi != be; ++bi)
462 removeValue(bi->Inst);
463 }
464
465 /***********************************************************************
466 * removeValue : remove the supplied value from its live range, and delete
467 * the range if it now has no values
468 *
469 * removeValueNoDelete : same, but do not delete the LR if it is now
470 * valueless
471 *
472 * Calling this with a value that does not have a live range is silently
473 * ignored.
474 */
removeValue(Value * V)475 void GenXLiveness::removeValue(Value *V)
476 {
477 for (unsigned i = 0, e = IndexFlattener::getNumElements(V->getType()); i != e; ++i)
478 removeValue(SimpleValue(V, i));
479 }
480
removeValue(SimpleValue V)481 void GenXLiveness::removeValue(SimpleValue V)
482 {
483 LiveRange *LR = removeValueNoDelete(V);
484 if (LR && !LR->Values.size()) {
485 // V was the only value in LR. Remove LR completely.
486 delete LR;
487 }
488 }
489
removeValueNoDelete(SimpleValue V)490 LiveRange *GenXLiveness::removeValueNoDelete(SimpleValue V)
491 {
492 LiveRangeMap_t::iterator i = LiveRangeMap.find(V);
493 if (i == end())
494 return nullptr;
495 LiveRange *LR = i->second;
496 LiveRangeMap.erase(i);
497 // Remove V from LR.
498 unsigned j;
499 for (j = 0; LR->Values[j].get() != V; ++j) {
500 IGC_ASSERT(j != LR->Values.size());
501 }
502 if (&LR->Values[j] != &LR->Values.back())
503 LR->Values[j] = LR->Values.back();
504 LR->Values.pop_back();
505 return LR;
506 }
507
508 /***********************************************************************
509 * removeValuesNoDelete : remove all values from the live range, but do not
510 * delete the LR
511 */
removeValuesNoDelete(LiveRange * LR)512 void GenXLiveness::removeValuesNoDelete(LiveRange *LR)
513 {
514 for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi)
515 LiveRangeMap.erase(*vi);
516 LR->value_clear();
517 }
518
519 /***********************************************************************
520 * replaceValue : update liveness such that NewVal has OldVal's live range,
521 * and OldVal does not have one at all.
522 */
replaceValue(Value * OldVal,Value * NewVal)523 void GenXLiveness::replaceValue(Value *OldVal, Value *NewVal)
524 {
525 for (unsigned i = 0, e = IndexFlattener::getNumElements(OldVal->getType());
526 i != e; ++i)
527 replaceValue(SimpleValue(OldVal, i), SimpleValue(NewVal, i));
528 }
529
replaceValue(SimpleValue OldVal,SimpleValue NewVal)530 void GenXLiveness::replaceValue(SimpleValue OldVal, SimpleValue NewVal)
531 {
532 LiveRangeMap_t::iterator i = LiveRangeMap.find(OldVal);
533 IGC_ASSERT(i != end());
534 LiveRange *LR = i->second;
535 LiveRangeMap.erase(i);
536 LiveRangeMap[NewVal] = LR;
537 unsigned j = 0;
538 IGC_ASSERT(!LR->Values.empty());
539 for (j = 0; LR->Values[j].get() != OldVal; ++j)
540 IGC_ASSERT(j != LR->Values.size());
541 LR->Values[j] = NewVal;
542 }
543
544 /***********************************************************************
545 * getOrCreateLiveRange : get live range for a value, creating if necessary
546 */
getOrCreateLiveRange(SimpleValue V)547 LiveRange *GenXLiveness::getOrCreateLiveRange(SimpleValue V)
548 {
549 LiveRangeMap_t::iterator i = LiveRangeMap.insert(
550 LiveRangeMap_t::value_type(V, 0)).first;
551 LiveRange *LR = i->second;
552 if (!LR) {
553 // Newly created map entry. Create the LiveRange for it.
554 LR = new LiveRange;
555 LR->Values.push_back(V);
556 i->second = LR;
557 LR->setAlignmentFromValue(
558 *DL, V, Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize);
559 }
560 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 // Give the Value a name if it doesn't already have one.
562 if (!V.getValue()->getName().size()) {
563 std::string NameBuf;
564 StringRef Name = "arg";
565 if (auto Inst = dyn_cast<Instruction>(V.getValue())) {
566 unsigned IID = GenXIntrinsic::getAnyIntrinsicID(V.getValue());
567 if (GenXIntrinsic::isAnyNonTrivialIntrinsic(IID)) {
568 // For an intrinsic call, use the intrinsic name after the
569 // final period.
570 NameBuf = GenXIntrinsic::getAnyName(IID, None);
571 Name = NameBuf;
572 size_t Period = Name.rfind('.');
573 if (Period != StringRef::npos)
574 Name = Name.slice(Period + 1, Name.size());
575 } else
576 Name = Inst->getOpcodeName();
577 }
578 V.getValue()->setName(Name);
579 }
580 #endif
581 return LR;
582 }
583
getOrCreateLiveRange(SimpleValue V,unsigned Cat,unsigned LogAlign)584 LiveRange *GenXLiveness::getOrCreateLiveRange(SimpleValue V, unsigned Cat, unsigned LogAlign) {
585 auto LR = getOrCreateLiveRange(V);
586 LR->setCategory(Cat);
587 LR->setLogAlignment(LogAlign);
588 return LR;
589 }
590
591 /***********************************************************************
592 * eraseLiveRange : get rid of live range for a Value, possibly multiple
593 * ones if it is a struct value
594 */
eraseLiveRange(Value * V)595 void GenXLiveness::eraseLiveRange(Value *V)
596 {
597 auto ST = dyn_cast<StructType>(V->getType());
598 if (!ST) {
599 eraseLiveRange(SimpleValue(V));
600 return;
601 }
602 for (unsigned i = 0, e = IndexFlattener::getNumElements(ST); i != e; ++i)
603 eraseLiveRange(SimpleValue(V, i));
604 }
605
606 /***********************************************************************
607 * eraseLiveRange : get rid of live range for a Value, if any
608 */
eraseLiveRange(SimpleValue V)609 void GenXLiveness::eraseLiveRange(SimpleValue V)
610 {
611 auto LR = getLiveRangeOrNull(V);
612 if (LR)
613 eraseLiveRange(LR);
614 }
615
616 /***********************************************************************
617 * eraseLiveRange : get rid of the specified live range, and remove its
618 * values from the map
619 */
eraseLiveRange(LiveRange * LR)620 void GenXLiveness::eraseLiveRange(LiveRange *LR)
621 {
622 for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi)
623 LiveRangeMap.erase(*vi);
624 delete LR;
625 }
626
627 /***********************************************************************
628 * getLiveRangeOrNull : get live range for value, or 0 if none
629 *
630 * The returned LiveRange pointer is valid only until the next time the
631 * live ranges are modified, including the case of coalescing.
632 */
getLiveRangeOrNull(SimpleValue V) const633 const LiveRange *GenXLiveness::getLiveRangeOrNull(SimpleValue V) const
634 {
635 auto i = LiveRangeMap.find(V);
636 if (i == end())
637 return nullptr;
638 return i->second;
639 }
640
getLiveRangeOrNull(SimpleValue V)641 LiveRange *GenXLiveness::getLiveRangeOrNull(SimpleValue V)
642 {
643 return const_cast<LiveRange*>(static_cast<const GenXLiveness*>(this)->getLiveRangeOrNull(V));
644 }
645
646 /***********************************************************************
647 * getLiveRange : get live range for value
648 *
649 * The returned LiveRange pointer is valid only until the next time the
650 * live ranges are modified, including the case of coalescing.
651 */
getLiveRange(SimpleValue V)652 LiveRange *GenXLiveness::getLiveRange(SimpleValue V)
653 {
654 LiveRange *LR = getLiveRangeOrNull(V);
655 IGC_ASSERT_MESSAGE(LR, "no live range found");
656 return LR;
657 }
658
659 /***********************************************************************
660 * getUnifiedRet : get/create unified return value for a function
661 *
662 * Returns already created unified value, or creates new one
663 * if there was no such.
664 */
getUnifiedRet(Function * F)665 Value *GenXLiveness::getUnifiedRet(Function *F) {
666 auto *Ret = getUnifiedRetIfExist(F);
667 if (!Ret)
668 return createUnifiedRet(F);
669 return Ret;
670 }
671
getUnifiedRetIfExist(Function * F) const672 Value *GenXLiveness::getUnifiedRetIfExist(Function *F) const {
673 auto RetIt = UnifiedRets.find(F);
674 if (RetIt == UnifiedRets.end())
675 return nullptr;
676 return RetIt->second;
677 }
678
679 /***********************************************************************
680 * createUnifiedRet : create unified return value for a function
681 *
682 * To allow all returns in a function and all results of calls to that
683 * function to use the same register, we have a dummy "unified return
684 * value".
685 *
686 * Cannot be called on a function with void return type.
687 *
688 * This also creates the LiveRange for the unified return value, or
689 * multiple ones if it is struct type, and sets the category to the same as in
690 * one of the return instructions.
691 */
createUnifiedRet(Function * F)692 Value *GenXLiveness::createUnifiedRet(Function *F) {
693 IGC_ASSERT_MESSAGE(!F->isDeclaration(), "must be a function definition");
694 IGC_ASSERT_MESSAGE(UnifiedRets.find(F) == UnifiedRets.end(),
695 "Unified ret must not have been already created");
696 Type *Ty = F->getReturnType();
697 IGC_ASSERT(!Ty->isVoidTy());
698 auto URet = genx::createUnifiedRet(Ty, "", F->getParent());
699 // Find some return inst.
700 ReturnInst *Ret = nullptr;
701 for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi)
702 if ((Ret = dyn_cast<ReturnInst>(fi->getTerminator())))
703 break;
704 IGC_ASSERT_MESSAGE(Ret, "must find return instruction");
705 Value *RetVal = Ret->getOperand(0);
706 // Use the categories of its operand to set the categories of the unified
707 // return value.
708 for (unsigned StructIdx = 0, NumElements = IndexFlattener::getNumElements(Ty);
709 StructIdx != NumElements; ++StructIdx) {
710 int Cat = getOrCreateLiveRange(SimpleValue(RetVal, StructIdx))
711 ->getOrDefaultCategory();
712 SimpleValue SV(URet, StructIdx);
713 getOrCreateLiveRange(SV)->setCategory(Cat);
714 }
715
716 UnifiedRets[F] = URet;
717 UnifiedRetToFunc[URet] = F;
718 return URet;
719 }
720
721 /***********************************************************************
722 * isUnifiedRet : test whether a value is a unified return value
723 *
724 * A unified ret value is a call instruction that is
725 * not attached to any BB, and is in the UnifiedRetFunc map.
726 */
isUnifiedRet(Value * V)727 Function *GenXLiveness::isUnifiedRet(Value *V)
728 {
729 // Quick checks first.
730 auto Inst = dyn_cast<CallInst>(V);
731 if (!Inst)
732 return nullptr;
733 if (Inst->getParent())
734 return nullptr;
735 // Then map lookup.
736 auto i = UnifiedRetToFunc.find(V);
737 if (i == UnifiedRetToFunc.end())
738 return nullptr;
739 return i->second;
740 }
741
742 /***********************************************************************
743 * moveUnifiedRet : move a function's unified return value to another function
744 *
745 * This is used when replacing a function with a new one in GenXArgIndirection.
746 */
moveUnifiedRet(Function * OldF,Function * NewF)747 void GenXLiveness::moveUnifiedRet(Function *OldF, Function *NewF)
748 {
749 auto i = UnifiedRets.find(OldF);
750 if (i == UnifiedRets.end())
751 return;
752 Value *UR = i->second;
753 UnifiedRets[NewF] = UR;
754 UnifiedRets.erase(i);
755 UnifiedRetToFunc[UR] = NewF;
756 }
757
758 /***********************************************************************
759 * find : given an instruction number, find a segment in a live range
760 *
761 * If the number is within a segment, or is just on its end point, that
762 * segment is returned. If the number is in a hole, the next segment
763 * after the hole is returned. If the number is before the first
764 * segment, the first segment is returned. If the number is after the
765 * last segment, end() is returned.
766 */
find(unsigned Pos)767 LiveRange::iterator LiveRange::find(unsigned Pos)
768 {
769 size_t Len = size();
770 if (!Len)
771 return end();
772 if (Pos > Segments[Len - 1].getEnd())
773 return end();
774 iterator I = begin();
775 do {
776 size_t Mid = Len >> 1;
777 if (Pos <= I[Mid].getEnd())
778 Len = Mid;
779 else
780 I += Mid + 1, Len -= Mid + 1;
781 } while (Len);
782 IGC_ASSERT(I->getEnd() >= Pos);
783 return I;
784 }
785
getCategoryForPredefinedVariable(SimpleValue SV)786 static unsigned getCategoryForPredefinedVariable(SimpleValue SV) {
787 const unsigned Category =
788 llvm::StringSwitch<unsigned>(SV.getValue()->getName())
789 .Case(genx::BSSVariableName, RegCategory::SURFACE)
790 .Default(RegCategory::NUMCATEGORIES);
791 IGC_ASSERT_MESSAGE(Category != RegCategory::NUMCATEGORIES,
792 "Unhandled predefined variable");
793 return Category;
794 }
795
isPredefinedVariable(SimpleValue SV)796 static bool isPredefinedVariable(SimpleValue SV) {
797 Value *V = SV.getValue();
798 IGC_ASSERT_MESSAGE(V, "Expected value");
799 if (!isa<GlobalVariable>(V))
800 return false;
801 IGC_ASSERT_MESSAGE(SV.getIndex() == 0,
802 "Expected single simple value for predefined variable");
803 auto *GV = cast<GlobalVariable>(V);
804 return GV->hasAttribute(genx::VariableMD::VCPredefinedVariable);
805 }
806
807 /***********************************************************************
808 * getOrDefaultCategory : get category; if none, set default
809 *
810 * The default category is PREDICATE for i1 or a vector of i1, or GENERAL
811 * for anything else.
812 */
getOrDefaultCategory()813 unsigned LiveRange::getOrDefaultCategory()
814 {
815 unsigned Cat = getCategory();
816 if (Cat != RegCategory::NONE)
817 return Cat;
818 IGC_ASSERT(!value_empty());
819 SimpleValue SV = *value_begin();
820 if (isPredefinedVariable(SV)) {
821 Cat = getCategoryForPredefinedVariable(SV);
822 setCategory(Cat);
823 return Cat;
824 }
825 Type *Ty = IndexFlattener::getElementType(
826 SV.getValue()->getType(), SV.getIndex());
827 if (Ty->getScalarType()->isIntegerTy(1))
828 Cat = RegCategory::PREDICATE;
829 else
830 Cat = RegCategory::GENERAL;
831 setCategory(Cat);
832 return Cat;
833 }
834
835 /***********************************************************************
836 * interfere : check whether two live ranges interfere
837 *
838 * Two live ranges interfere if there is a segment from each that overlap
839 * and they are considered to cause interference by
840 * checkIfOverlappingSegmentsInterfere below.
841 */
interfere(LiveRange * LR1,LiveRange * LR2)842 bool GenXLiveness::interfere(LiveRange *LR1, LiveRange *LR2) {
843 if (CoalescingDisabled)
844 return true;
845 return getSingleInterferenceSites(LR1, LR2, nullptr);
846 }
847
848 /***********************************************************************
849 * twoAddrInterfere : check whether two live ranges interfere, allowing for
850 * single number interference sites at two address ops
851 *
852 * Return: true if they interfere
853 *
854 * Two live ranges interfere if there is a segment from each that overlap
855 * and are not both weak.
856 *
857 * But, if each interfering segment is a single number that is the precopy
858 * site of a two address op, and the result of the two address op is in one LR
859 * and the two address operand is in the other, then that is not counted as
860 * interference.
861 *
862 * That provision allows for coalescing at a two address op where the two
863 * address operand has already been copy coalesced with, or is the same value
864 * as, a different operand in the same bale, as follows:
865 *
866 * Suppose the two address op a has number N, and it has two address operand b
867 * and some other operand c in the same bale:
868 *
869 * N-1: (space for precopy)
870 * N: a = op(b, c)
871 *
872 * with live ranges
873 * a:[N-1,..)
874 * b:[..,N-1)
875 * c:[..,N)
876 *
877 * Then a and b can coalesce.
878 *
879 * But suppose b and c are the same value, or had previously been copy coalesced.
880 * Then we have live ranges
881 * a:[N-1,..)
882 * b,c:[..,N)
883 *
884 * and a and b now interfere needlessly.
885 *
886 * This function is called on an attempt to coalesce a and b (or rather the
887 * live range containing a and the live range containing b). In it, we see
888 * that there is a single number segment of interference [N-1,N), where a is
889 * the result and b is the two address operand of the two address op at N. Thus
890 * we discount that segment of interference, and a and b can still coalesce.
891 *
892 * Note that this formulation allows for there to be multiple such sites because
893 * of multiple two address results being already coalesced together through phi
894 * nodes.
895 */
twoAddrInterfere(LiveRange * LR1,LiveRange * LR2)896 bool GenXLiveness::twoAddrInterfere(LiveRange *LR1, LiveRange *LR2) {
897 if (CoalescingDisabled)
898 return true;
899 SmallVector<unsigned, 4> Sites;
900 if (getSingleInterferenceSites(LR1, LR2, &Sites))
901 return true; // interferes, not just single number sites
902 if (Sites.empty())
903 return false; // does not interfere at all
904 // Put the single number sites in a set.
905 SmallSet<unsigned, 4> SitesSet;
906 LLVM_DEBUG(dbgs() << "got single number interference sites:");
907 for (auto i = Sites.begin(), e = Sites.end(); i != e; ++i) {
908 LLVM_DEBUG(dbgs() << " " << *i);
909 SitesSet.insert(*i);
910 }
911 LLVM_DEBUG(dbgs() << "\nbetween:\n" << *LR1 << "\n" << *LR2 << "\n");
912 Sites.clear();
913 // Check each def in LR1 and LR2 for being a two address op that causes us to
914 // discount a single number interference site.
915 for (auto LR = LR1, OtherLR = LR2; LR;
916 LR = LR == LR1 ? LR2 : nullptr, OtherLR = LR1) {
917 for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi) {
918 auto CI = dyn_cast<CallInst>(vi->getValue());
919 if (!CI)
920 continue;
921 auto OperandNum = getTwoAddressOperandNum(CI);
922 if (!OperandNum)
923 continue;
924 // Got a two addr op. Check whether the two addr operand is in the other
925 // LR.
926 if (getLiveRangeOrNull(CI->getOperand(*OperandNum)) != OtherLR)
927 continue;
928 // Discount the single number interference site here, if there is one.
929 SitesSet.erase(getNumbering()->getNumber(CI) - 1);
930 }
931 }
932 // If we have discounted all sites, then there is no interference.
933 return !SitesSet.empty();
934 }
935
936 /***********************************************************************
937 * getSingleInterferenceSites : check whether two live ranges interfere,
938 * returning single number interference sites
939 *
940 * Enter: LR1, LR2 = live ranges to check
941 * Sites = vector in which to store single number interference sites,
942 * or 0 if we do not want to collect them
943 *
944 * Return: true if the live ranges interfere other than as reflected in Sites
945 *
946 * Two live ranges interfere if there is a segment from each that overlap
947 * and are not both weak.
948 *
949 * If Sites is 0 (the caller does not want the Sites list), then the function
950 * returns true if there is any interference.
951 *
952 * If Sites is not 0, then any interference in a single number segment, for
953 * example [19,20), causes the start number to be pushed into Sites. The
954 * function returns true only if there is interference that cannot be described
955 * in Sites.
956 */
getSingleInterferenceSites(LiveRange * LR1,LiveRange * LR2,SmallVectorImpl<unsigned> * Sites)957 bool GenXLiveness::getSingleInterferenceSites(LiveRange *LR1, LiveRange *LR2,
958 SmallVectorImpl<unsigned> *Sites)
959 {
960 // Swap if necessary to make LR1 the one with more segments.
961 if (LR1->size() < LR2->size())
962 std::swap(LR1, LR2);
963 auto Idx2 = LR2->begin(), End2 = LR2->end();
964 // Find segment in LR1 that contains or is the next after the start
965 // of the first segment in LR2, including the case that the start of
966 // the LR2 segment abuts the end of the LR1 segment.
967 auto Idx1 = LR1->find(Idx2->getStart()), End1 = LR1->end();
968 if (Idx1 == End1)
969 return false;
970 for (;;) {
971 // Check for overlap.
972 if (Idx1->getStart() < Idx2->getStart()) {
973 if (Idx1->getEnd() > Idx2->getStart())
974 if (checkIfOverlappingSegmentsInterfere(LR1, Idx1, LR2, Idx2)) {
975 // Idx1 overlaps Idx2. Check if it is a single number overlap that can
976 // be pushed into Sites.
977 if (!Sites || Idx1->getEnd() != Idx2->getStart() + 1)
978 return true;
979 Sites->push_back(Idx2->getStart());
980 }
981 } else {
982 if (Idx1->getStart() < Idx2->getEnd())
983 if (checkIfOverlappingSegmentsInterfere(LR1, Idx1, LR2, Idx2)) {
984 // Idx2 overlaps Idx1. Check if it is a single number overlap that can
985 // be pushed into Sites.
986 if (!Sites || Idx2->getEnd() != Idx1->getStart() + 1)
987 return true;
988 Sites->push_back(Idx1->getStart());
989 }
990 }
991 // Advance whichever one has the lowest End.
992 if (Idx1->getEnd() < Idx2->getEnd()) {
993 if (++Idx1 == End1)
994 return false;
995 } else {
996 if (++Idx2 == End2)
997 return false;
998 }
999 }
1000 }
1001
1002 /***********************************************************************
1003 * checkIfOverlappingSegmentsInterfere : given two segments that have been
1004 * shown to overlap, check whether their strengths make them interfere
1005 *
1006 * If both segments are weak, they do not interfere.
1007 *
1008 * Interference between a normal segment in one LR and a phicpy segment in the
1009 * other LR is ignored, as long as the phicpy segment relates to a phi incoming
1010 * where the phi node is in the LR with the phicpy segment and the incoming
1011 * value is in the LR with the strong segment. This is used to avoid
1012 * unnecessary interference for a phi incoming through a critical edge, where
1013 * the incoming is likely to be used in the other successor as well.
1014 */
checkIfOverlappingSegmentsInterfere(LiveRange * LR1,Segment * S1,LiveRange * LR2,Segment * S2)1015 bool GenXLiveness::checkIfOverlappingSegmentsInterfere(
1016 LiveRange *LR1, Segment *S1, LiveRange *LR2, Segment *S2)
1017 {
1018 if (S1->isWeak() && S2->isWeak())
1019 return false; // both segments weak
1020 if (S2->Strength == Segment::PHICPY) {
1021 // Swap so that if either segment is phicpy, then it is S1 for the check
1022 // below.
1023 std::swap(LR1, LR2);
1024 std::swap(S1, S2);
1025 }
1026 if (S1->Strength != Segment::PHICPY)
1027 return true;
1028 // S1 is phicpy. If its corresponding phi cpy insertion point is for a phi
1029 // node in LR1 and an incoming in LR2, then this does not cause interference.
1030 auto PhiIncoming = Numbering->getPhiIncomingFromNumber(S1->getStart());
1031 auto PhiFound = std::find_if(
1032 PhiIncoming.begin(), PhiIncoming.end(), [LR1, this](auto It) {
1033 PHINode *Phi = It.first;
1034 IGC_ASSERT_MESSAGE(Phi, "phi incoming not found");
1035 return getLiveRange(Phi) == LR1;
1036 });
1037 if (PhiFound == PhiIncoming.end())
1038 return true; // phi not in LR1, interferes
1039
1040 PHINode *Phi = PhiFound->first;
1041 unsigned IncomingBBIndex = PhiFound->second;
1042 if (getLiveRangeOrNull(Phi->getIncomingValue(IncomingBBIndex)) != LR2)
1043 return true; // phi incoming not in LR2, interferes
1044 // Conditions met -- does not cause interference.
1045 return false;
1046 }
1047
1048 /***********************************************************************
1049 * coalesce : coalesce two live ranges that do not interfere
1050 *
1051 * Enter: LR1 = first live range
1052 * LR2 = second live range
1053 * DisallowCASC = true to disallow call arg special coalescing
1054 * into the resulting live range
1055 *
1056 * Return: new live range (LR1 and LR2 now invalid)
1057 */
coalesce(LiveRange * LR1,LiveRange * LR2,bool DisallowCASC)1058 LiveRange *GenXLiveness::coalesce(LiveRange *LR1, LiveRange *LR2,
1059 bool DisallowCASC)
1060 {
1061 IGC_ASSERT_MESSAGE(LR1 != LR2, "cannot coalesce an LR to itself");
1062 IGC_ASSERT_MESSAGE(LR1->Category == LR2->Category,
1063 "cannot coalesce two LRs with different categories");
1064 // Make LR1 the one with the longer list of segments.
1065 if (LR2->Segments.size() > LR1->Segments.size()) {
1066 LiveRange *temp = LR1;
1067 LR1 = LR2;
1068 LR2 = temp;
1069 }
1070 LLVM_DEBUG(
1071 dbgs() << "Coalescing \"";
1072 LR1->print(dbgs());
1073 dbgs() << "\" and \"";
1074 LR2->print(dbgs());
1075 dbgs() << "\"\n"
1076 );
1077 // Do the merge of the segments.
1078 merge(LR1, LR2);
1079 // Copy LR2's values across to LR1.
1080 for (auto i = LR2->value_begin(), e = LR2->value_end(); i != e; ++i)
1081 LiveRangeMap[LR1->addValue(*i)] = LR1;
1082 // Use the largest alignment from the two LRs.
1083 LR1->LogAlignment = std::max(LR1->LogAlignment, LR2->LogAlignment);
1084 // If either LR has a non-zero offset, use it.
1085 IGC_ASSERT(!LR1->Offset || !LR2->Offset);
1086 LR1->Offset |= LR2->Offset;
1087 // Set DisallowCASC.
1088 LR1->DisallowCASC |= LR2->DisallowCASC | DisallowCASC;
1089 delete LR2;
1090 LLVM_DEBUG(
1091 dbgs() << " giving \"";
1092 LR1->print(dbgs());
1093 dbgs() << "\"\n"
1094 );
1095 return LR1;
1096 }
1097
1098 /***********************************************************************
1099 * copyInterfere : check whether two live ranges copy-interfere
1100 *
1101 * Two live ranges LR1 and LR2 copy-interfere (a non-commutative relation)
1102 * if LR1 includes a value that is a phi node whose definition is within
1103 * LR2.
1104 */
copyInterfere(LiveRange * LR1,LiveRange * LR2)1105 bool GenXLiveness::copyInterfere(LiveRange *LR1, LiveRange *LR2) {
1106 if (CoalescingDisabled)
1107 return true;
1108 // Find a phi node value in LR1. It can have at most one, because only
1109 // copy coalescing has occurred up to now, and copy coalescing does not
1110 // occur at a phi node.
1111 for (unsigned i = 0, e = LR1->Values.size(); i != e; ++i) {
1112 auto Phi = dyn_cast<PHINode>(LR1->Values[i].getValue());
1113 if (!Phi)
1114 continue;
1115 // Found a phi node in LR1. A phi node has multiple instruction numbers,
1116 // one for each incoming block. See if any one of those is in LR2's
1117 // live range.
1118 for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i)
1119 if (LR2->contains(Numbering->getPhiNumber(Phi, Phi->getIncomingBlock(i))))
1120 return true;
1121 break;
1122 }
1123 return false; // no phi node found
1124 }
1125
1126 /***********************************************************************
1127 * wrapsAround : detects if V1 is a phi node and V2 wraps round to a use
1128 * in a phi node in the same basic block as V1 and after it
1129 */
wrapsAround(Value * V1,Value * V2)1130 bool GenXLiveness::wrapsAround(Value *V1, Value *V2)
1131 {
1132 auto PhiDef = dyn_cast<PHINode>(V1);
1133 if (!PhiDef)
1134 return false;
1135 for (auto ui = V2->use_begin(), ue = V2->use_end(); ui != ue; ++ui) {
1136 if (auto PhiUse = dyn_cast<PHINode>(ui->getUser())) {
1137 if (PhiUse->getParent() == PhiDef->getParent()) {
1138 // Phi use in the same BB. Scan until we find PhiDef or the end
1139 // of the phi nodes.
1140 while (PhiUse != PhiDef) {
1141 PhiUse = dyn_cast<PHINode>(PhiUse->getNextNode());
1142 if (!PhiUse)
1143 return true; // reach end of phi nodes
1144 }
1145 }
1146 }
1147 }
1148 return false;
1149 }
1150
1151 /***********************************************************************
1152 * insertCopy : insert a copy of a non-struct value
1153 *
1154 * Enter: InputVal = value to copy
1155 * LR = live range to add the new value to (0 to avoid adjusting
1156 * live ranges)
1157 * InsertBefore = insert copy before this inst
1158 * Name = name to give the new value
1159 * Number = number to give the new instruction(s), 0 for none
1160 *
1161 * Return: The new copy instruction
1162 *
1163 * This inserts multiple copies if the input value is a vector that is
1164 * bigger than two GRFs or a non power of two size.
1165 *
1166 * This method is mostly used from GenXCoalescing, which passes an LR to
1167 * add the new copied value to.
1168 *
1169 * It is also used from GenXLiveRange if it needs to add a copy to break an
1170 * overlapping circular phi value, in which case LR is 0 as we do not want to
1171 * adjust live ranges. Also at this stage there is no baling info to update.
1172 */
insertCopy(Value * InputVal,LiveRange * LR,Instruction * InsertBefore,const Twine & Name,unsigned Number,const GenXSubtarget * ST)1173 Instruction *GenXLiveness::insertCopy(Value *InputVal, LiveRange *LR,
1174 Instruction *InsertBefore,
1175 const Twine &Name, unsigned Number,
1176 const GenXSubtarget *ST) {
1177 IGC_ASSERT(!isa<Constant>(InputVal));
1178 bool AdjustLRs = LR != nullptr;
1179 LiveRange *SourceLR = nullptr;
1180 if (AdjustLRs)
1181 SourceLR = getLiveRange(InputVal);
1182 auto InputTy = InputVal->getType();
1183 if (InputTy->getScalarType()->isIntegerTy(1)) {
1184 // The input is a predicate.
1185 if (!isa<Constant>(InputVal)) {
1186 // The predicate input is not a constant.
1187 // There is no way in vISA of copying from one
1188 // predicate to another, so we copy all 0s into the destination
1189 // then "or" the source into it.
1190 Instruction *NewInst = CastInst::Create(Instruction::BitCast,
1191 Constant::getNullValue(InputTy), InputTy, Name, InsertBefore);
1192 Numbering->setNumber(NewInst, Number);
1193 if (AdjustLRs)
1194 setLiveRange(SimpleValue(NewInst), LR);
1195 NewInst = BinaryOperator::Create(Instruction::Or, NewInst, InputVal, Name,
1196 InsertBefore);
1197 Numbering->setNumber(NewInst, Number);
1198 if (AdjustLRs)
1199 setLiveRange(SimpleValue(NewInst), LR);
1200 return NewInst;
1201 }
1202 // Predicate input is constant.
1203 auto NewInst = CastInst::Create(Instruction::BitCast, InputVal,
1204 InputTy, Name, InsertBefore);
1205 Numbering->setNumber(NewInst, Number);
1206 if (AdjustLRs)
1207 setLiveRange(SimpleValue(NewInst), LR);
1208 return NewInst;
1209 }
1210 Instruction *NewInst = nullptr;
1211 if (InputTy->isPointerTy()) {
1212 // BitCast used to represent a normal copy.
1213 NewInst = CastInst::Create(Instruction::BitCast, InputVal,
1214 InputVal->getType(), Name, InsertBefore);
1215 if (Number)
1216 Numbering->setNumber(NewInst, Number);
1217 if (AdjustLRs)
1218 setLiveRange(SimpleValue(NewInst), LR);
1219 return NewInst;
1220 }
1221
1222 Region R(InputVal);
1223 IGC_ASSERT(ST);
1224 unsigned MaxNum = getLegalRegionSizeForTarget(
1225 *ST, R, /* StartIdx */ 0, /* Allow2D */ false, R.NumElements);
1226 // Adjust size to Exec size
1227 MaxNum = std::min(MaxNum, TotalEMSize);
1228 if (exactLog2(R.NumElements) >= 0 && R.NumElements <= MaxNum) {
1229 // Can be done with a single copy.
1230 if (SourceLR && (SourceLR->Category != RegCategory::GENERAL
1231 || (LR && LR->Category != RegCategory::GENERAL))) {
1232 // Need a category conversion (including the case that the two
1233 // categories are the same but not GENERAL).
1234 NewInst = createConvert(InputVal, Name, InsertBefore);
1235 } else {
1236 // BitCast used to represent a normal copy.
1237 NewInst = CastInst::Create(Instruction::BitCast, InputVal,
1238 InputVal->getType(), Name, InsertBefore);
1239 }
1240 if (Number)
1241 Numbering->setNumber(NewInst, Number);
1242 if (AdjustLRs)
1243 setLiveRange(SimpleValue(NewInst), LR);
1244 return NewInst;
1245 }
1246
1247 auto collectFragment = [](Value *V, unsigned MaxFrag,
1248 SmallVectorImpl<std::pair<unsigned, unsigned>>& Frag,
1249 unsigned MaxElt) {
1250 while (!isa<UndefValue>(V)) {
1251 if (!GenXIntrinsic::isWrRegion(V))
1252 return true;
1253 IntrinsicInst *WII = cast<IntrinsicInst>(V);
1254 Region R = makeRegionFromBaleInfo(WII, BaleInfo());
1255 if (R.Indirect || !R.isContiguous() || !R.isWholeNumRows())
1256 return true;
1257 if ((R.Offset % R.ElementBytes) != 0)
1258 return true;
1259 unsigned Base = R.Offset / R.ElementBytes;
1260 for (unsigned Offset = 0; Offset < R.NumElements; /*EMPTY*/) {
1261 unsigned NumElts = std::min(MaxElt, R.NumElements - Offset);
1262 // Round NumElts down to power of 2. That is how many elements we
1263 // are copying this time round the loop.
1264 NumElts = 1 << genx::log2(NumElts);
1265 Frag.push_back(std::make_pair(Base + Offset, NumElts));
1266 Offset += NumElts;
1267 }
1268 V = WII->getOperand(0);
1269 }
1270 if (Frag.size() > MaxFrag)
1271 return true;
1272 std::sort(Frag.begin(), Frag.end());
1273 return false;
1274 };
1275
1276 unsigned NumElements = R.NumElements;
1277 SmallVector<std::pair<unsigned, unsigned>, 8> Fragments;
1278 unsigned MaxCopies = (NumElements + MaxNum - 1) / MaxNum;
1279 if (collectFragment(InputVal, MaxCopies, Fragments, MaxNum)) {
1280 Fragments.clear();
1281 for (unsigned Offset = 0; Offset < NumElements; /*EMPTY*/) {
1282 unsigned NumElts = std::min(MaxNum, NumElements - Offset);
1283 // Round NumElts down to power of 2. That is how many elements we
1284 // are copying this time round the loop.
1285 NumElts = 1 << genx::log2(NumElts);
1286 Fragments.push_back(std::make_pair(Offset, NumElts));
1287 Offset += NumElts;
1288 }
1289 }
1290 // Need to split the copy up. Start with an undef destination.
1291 Value *Res = UndefValue::get(InputVal->getType());
1292 for (auto &I : Fragments) {
1293 unsigned Offset = I.first;
1294 // Set the subregion.
1295 R.NumElements = I.second;
1296 R.Width = R.NumElements;
1297 R.Offset = Offset * R.ElementBytes;
1298 // Create the rdregion. Do not add this to a live range because it is
1299 // going to be baled in to the wrregion.
1300 Instruction *RdRegion = R.createRdRegion(InputVal, Name, InsertBefore,
1301 DebugLoc(), true/*AllowScalar*/);
1302 if (Baling)
1303 Baling->setBaleInfo(RdRegion, BaleInfo(BaleInfo::RDREGION, 0));
1304 if (Number)
1305 Numbering->setNumber(RdRegion, Number);
1306 // Create the wrregion, and mark that it bales in the rdregion (operand 1).
1307 NewInst = R.createWrRegion(Res, RdRegion, Name, InsertBefore, DebugLoc());
1308 if (Number)
1309 Numbering->setNumber(NewInst, Number);
1310 if (Baling) {
1311 BaleInfo BI(BaleInfo::WRREGION);
1312 BI.setOperandBaled(1);
1313 Baling->setBaleInfo(NewInst, BI);
1314 }
1315 if (AdjustLRs) {
1316 // Add the last wrregion to the live range (thus coalescing them all
1317 // together and in with the phi node or two address op that we're doing
1318 // the copy for).
1319 setLiveRange(SimpleValue(NewInst), LR);
1320 }
1321 Res = NewInst;
1322 }
1323 return NewInst;
1324 }
1325
1326 /***********************************************************************
1327 * merge : merge segments of LR2 into LR1
1328 *
1329 * This is equivalent to addSegments followed by sortAndMerge.
1330 *
1331 * Previously there was some code here that attempted to optimize on the
1332 * assumption that the caller passed the one with the longer list of segments
1333 * as LR1. However that became too complicated once we introduced weak and
1334 * strong liveness.
1335 *
1336 * One day we could re-introduce some simple optimized paths, such as when
1337 * LR2 has a single segment.
1338 */
merge(LiveRange * LR1,LiveRange * LR2)1339 void GenXLiveness::merge(LiveRange *LR1, LiveRange *LR2)
1340 {
1341 LR1->addSegments(LR2);
1342 LR1->sortAndMerge();
1343 }
1344
1345 /***********************************************************************
1346 * eraseUnusedTree : erase unused tree of instructions
1347 *
1348 * Enter: Inst = root of unused tree
1349 *
1350 * This erases Inst, then recursively erases other instructions that become
1351 * unused. Erased instructions are also removed from liveness.
1352 *
1353 * Other than the given Inst, this does not erase a non-intrinsic call, or
1354 * an intrinsic call with a side effect.
1355 *
1356 * Instead of erasing as we go, we undef operands to make them unused and then
1357 * erase everything at the end. This is required for the case that we have an
1358 * unused DAG of instructions rather than just an unused tree, for example
1359 * where we have a rd-wr sequence and all the rds use the same input.
1360 */
eraseUnusedTree(Instruction * TopInst)1361 void GenXLiveness::eraseUnusedTree(Instruction *TopInst)
1362 {
1363 SmallVector<Instruction *, 4> Stack;
1364 std::set<Instruction *> ToErase;
1365 Stack.push_back(TopInst);
1366 while (!Stack.empty()) {
1367 auto Inst = Stack.back();
1368 Stack.pop_back();
1369 if (!Inst->use_empty())
1370 continue;
1371 if (TopInst != Inst) {
1372 if (auto CI = dyn_cast<CallInst>(Inst)) {
1373 if (!GenXIntrinsic::isAnyNonTrivialIntrinsic(CI))
1374 continue;
1375 if (!CI->doesNotAccessMemory())
1376 continue;
1377 }
1378 }
1379 for (unsigned oi = 0, oe = Inst->getNumOperands(); oi != oe; ++oi)
1380 if (auto OpndInst = dyn_cast<Instruction>(Inst->getOperand(oi))) {
1381 Stack.push_back(OpndInst);
1382 Inst->setOperand(oi, UndefValue::get(OpndInst->getType()));
1383 }
1384 removeValue(Inst);
1385 ToErase.insert(Inst);
1386 }
1387 for (auto i = ToErase.begin(), e = ToErase.end(); i != e; ++i)
1388 (*i)->eraseFromParent();
1389 }
1390
1391 /***********************************************************************
1392 * setArgAddressBase : set the base value of an argument indirect address
1393 *
1394 * Enter: Addr = genx.convert.addr instruction
1395 * Base = a base register for the Addr
1396 */
setArgAddressBase(Value * Addr,Value * Base)1397 void GenXLiveness::setArgAddressBase(Value *Addr, Value *Base) {
1398 // There might be several addresses for the one base. So, find and erase an
1399 // old address for the input base.
1400 auto Res = ArgAddressBaseMap.find(Addr);
1401 if (Res != ArgAddressBaseMap.end()) {
1402 Value *PreviousBase = Res->second;
1403 auto AddrsRange = BaseToArgAddrMap.equal_range(PreviousBase);
1404 auto ToRemove = std::find_if(AddrsRange.first, AddrsRange.second,
1405 [Addr](auto It) { return It.second == Addr; });
1406 IGC_ASSERT_MESSAGE(
1407 ToRemove != AddrsRange.second,
1408 "Addr -> PreviousBase exists in ArgAddressBaseMap. It must "
1409 "exist in BaseToArgAddrMap too.");
1410 BaseToArgAddrMap.erase(ToRemove);
1411 }
1412
1413 // Set the new connection between address and base.
1414 ArgAddressBaseMap[Addr] = Base;
1415 BaseToArgAddrMap.insert({Base, Addr});
1416 }
1417
1418 /***********************************************************************
1419 * getAddressBase : get the base register of an address
1420 *
1421 * Enter: Addr = address conversion (genx.convert.addr instruction)
1422 *
1423 * Return: The Value for the base that the address is used with, or some
1424 * other Value that is coalesced with that
1425 */
getAddressBase(Value * Addr)1426 Value *GenXLiveness::getAddressBase(Value *Addr)
1427 {
1428 // Get the base register from the rdregion/wrregion that the index is used
1429 // in. This might involve going via an add or an rdregion.
1430 Use *U = &*Addr->use_begin();
1431 auto user = cast<Instruction>(U->getUser());
1432 while (!U->getOperandNo()) {
1433 U = &*user->use_begin();
1434 user = cast<Instruction>(U->getUser());
1435 }
1436 if (GenXIntrinsic::isRdRegion(user))
1437 return user->getOperand(0);
1438 if (GenXIntrinsic::isWrRegion(user)) {
1439 auto Head = Baling->getBaleHead(user);
1440 if (Head && isa<StoreInst>(Head)) {
1441 Value *V = Head->getOperand(1);
1442 V = getUnderlyingGlobalVariable(V);
1443 IGC_ASSERT_MESSAGE(V, "null base not expected");
1444 return V;
1445 }
1446 return user;
1447 }
1448 // The above scheme does not work for an address conversion added by
1449 // GenXArgIndirection. Instead we have AddressBaseMap to provide the mapping.
1450 auto i = ArgAddressBaseMap.find(Addr);
1451 IGC_ASSERT_MESSAGE(i != ArgAddressBaseMap.end(),
1452 "base register not found for address");
1453 Value *BaseV = i->second;
1454 LiveRange *LR = getLiveRange(BaseV);
1455 // Find a SimpleValue in the live range that is not a struct member.
1456 for (auto vi = LR->value_begin(), ve = LR->value_end(); vi != ve; ++vi) {
1457 Value *V = vi->getValue();
1458 if (!isa<StructType>(V->getType()))
1459 return V;
1460 }
1461 IGC_ASSERT_EXIT_MESSAGE(0, "non-struct value not found");
1462 }
1463
1464 /***********************************************************************
1465 * getAddressWithBase : get addresses that base register is a Base
1466 *
1467 * Enter: Base = assumed base
1468 *
1469 * Return: If the input value is a base, return the vector of the corresponding
1470 * arguments indirect addresses, empty vector otherwise.
1471 */
getAddressWithBase(Value * Base)1472 std::vector<Value *> GenXLiveness::getAddressWithBase(Value *Base) {
1473 auto Res = BaseToArgAddrMap.equal_range(Base);
1474 std::vector<Value *> Bases;
1475 std::transform(Res.first, Res.second, std::back_inserter(Bases),
1476 [](auto It) { return It.second; });
1477 return Bases;
1478 }
1479
1480 /***********************************************************************
1481 * isNoopCastCoalesced : see if the no-op cast has been coalesced away
1482 *
1483 * This handles the case that the input and result of the no-op cast are coalesced
1484 * in to the same live range.
1485 */
isNoopCastCoalesced(CastInst * CI)1486 bool GenXLiveness::isNoopCastCoalesced(CastInst *CI)
1487 {
1488 IGC_ASSERT(genx::isNoopCast(CI));
1489 return getLiveRangeOrNull(CI) == getLiveRangeOrNull(CI->getOperand(0));
1490 }
1491
1492 /***********************************************************************
1493 * dump, print : dump the liveness info
1494 */
1495 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump()1496 void GenXLiveness::dump()
1497 {
1498 print(errs()); errs() << '\n';
1499 }
dump() const1500 void LiveRange::dump() const
1501 {
1502 print(errs()); errs() << '\n';
1503 }
1504 #endif
1505
printValueLiveness(Value * V,raw_ostream & OS) const1506 void GenXLiveness::printValueLiveness(Value *V, raw_ostream &OS) const {
1507 const LiveRange *LR = getLiveRangeOrNull(V);
1508 if (!LR)
1509 return;
1510 // Only show an LR if the map iterator is on the value that appears first
1511 // in the LR. That avoids printing the same LR multiple times.
1512 if (V != LR->value_begin()->getValue())
1513 return;
1514
1515 LR->print(OS);
1516 OS << "\n";
1517 }
1518
print(raw_ostream & OS,const FunctionGroup * dummy) const1519 void GenXLiveness::print(raw_ostream &OS, const FunctionGroup *dummy) const {
1520 OS << "GenXLiveness for FunctionGroup " << FG->getName() << "\n";
1521 // Print live ranges for global variables;
1522 for (auto &G : FG->getModule()->globals())
1523 printValueLiveness(&G, OS);
1524 for (auto i = FG->begin(), e = FG->end(); i != e; ++i) {
1525 Function *Func = *i;
1526 // Print live ranges for args.
1527 for (auto fi = Func->arg_begin(), fe = Func->arg_end(); fi != fe; ++fi)
1528 printValueLiveness(&*fi, OS);
1529 // Print live range(s) for unified return value.
1530 if (i != FG->begin() && !Func->getReturnType()->isVoidTy()) {
1531 auto Ret = getUnifiedRetIfExist(Func);
1532 if (Ret)
1533 printValueLiveness(Ret, OS);
1534 }
1535 // Print live ranges for code.
1536 for (auto &Inst : instructions(Func))
1537 printValueLiveness(&Inst, OS);
1538 }
1539 OS << "\n";
1540 }
1541
1542 /***********************************************************************
1543 * LiveRange::testLiveRanges : tests if no segments abut or overlap or are
1544 * in the wrong order. Live range's segments should be well formed.
1545 */
testLiveRanges() const1546 bool LiveRange::testLiveRanges() const
1547 {
1548 auto testAdjacentSegments = [](const Segment& A, const Segment& B)
1549 {
1550 const bool P1 = (A.Strength != B.Strength);
1551 const bool P2 = (A.getEnd() < B.getStart());
1552 const bool SegmentIsGood = (P1 || P2);
1553 IGC_ASSERT(SegmentIsGood);
1554 const bool StopSearch = !SegmentIsGood;
1555 return StopSearch;
1556 };
1557
1558 const_iterator Begin = begin();
1559 const_iterator End = end();
1560 const_iterator Failed = std::adjacent_find(Begin, End, testAdjacentSegments);
1561
1562 const bool Result = (Failed == End);
1563 return Result;
1564 }
1565
1566 /***********************************************************************
1567 * LiveRange::addSegment : add a segment to a live range
1568 *
1569 * The segment might already be covered by an existing segment, in which
1570 * case nothing changes.
1571 *
1572 * It would be inefficient to implement coalesce() in terms of this, because
1573 * it might have to shuffle lots of elements along by one each time.
1574 * This function only gets called when adding a single segment to a live
1575 * range when inserting a copy in coalescing.
1576 */
addSegment(Segment Seg)1577 void LiveRange::addSegment(Segment Seg)
1578 {
1579 iterator i = find(Seg.getStart()), e = end();
1580 if (i == e) {
1581 // New segment off end.
1582 Segments.push_back(Seg);
1583 } else if (i->getStart() <= Seg.getStart()) {
1584 // New segment is covered by or overlaps the end of old segment i.
1585 if (i->getEnd() < Seg.getEnd()) {
1586 i->setEnd(Seg.getEnd());
1587 // See if it covers or overlaps any later segments.
1588 iterator j = i + 1;
1589 while (j != e) {
1590 if (j->getStart() > Seg.getEnd())
1591 break;
1592 i->setEnd(j->getEnd());
1593 if (j->getEnd() >= Seg.getEnd())
1594 break;
1595 ++j;
1596 }
1597 Segments.erase(i + 1, j);
1598 }
1599 } else if (i->getStart() == Seg.getEnd()) {
1600 // New segment abuts start of old segment i, without abutting or
1601 // overlapping anything before.
1602 i->setStart(Seg.getStart());
1603 } else {
1604 // New segment is completely in a hole just before i.
1605 Segments.insert(i, Seg);
1606 }
1607 IGC_ASSERT(testLiveRanges());
1608 }
1609
1610 /***********************************************************************
1611 * LiveRange::setSegmentsFrom : for this live range, clear out its segments
1612 * and copy them from the other live range
1613 */
setSegmentsFrom(LiveRange * Other)1614 void LiveRange::setSegmentsFrom(LiveRange *Other)
1615 {
1616 Segments.clear();
1617 Segments.append(Other->Segments.begin(), Other->Segments.end());
1618 }
1619
1620 /***********************************************************************
1621 * LiveRange::addSegments : add segments of LR2 into this
1622 */
addSegments(LiveRange * LR2)1623 void LiveRange::addSegments(LiveRange *LR2)
1624 {
1625 Segments.append(LR2->Segments.begin(), LR2->Segments.end());
1626 }
1627
1628 /***********************************************************************
1629 * LiveRange::sortAndMerge : after doing some push_backs, sort the segments,
1630 * and merge overlapping/adjacent ones
1631 */
sortAndMerge()1632 void LiveRange::sortAndMerge() {
1633 std::sort(Segments.begin(), Segments.end());
1634
1635 // Ensure that there are no duplicate segments:
1636 Segments_t::iterator ip;
1637 ip = std::unique(Segments.begin(), Segments.end());
1638 Segments.resize(std::distance(Segments.begin(), ip));
1639
1640 Segments_t SegmentsSortedEnd = Segments;
1641 std::sort(SegmentsSortedEnd.begin(), SegmentsSortedEnd.end(),
1642 [](Segment L, Segment R) {
1643 if (L.getEnd() != R.getEnd())
1644 return L.getEnd() < R.getEnd();
1645 return L.getStart() < R.getStart();
1646 });
1647
1648 Segments_t NewSegments;
1649 std::unordered_set<Segment> OpenedSegments;
1650 Segment *SS = Segments.begin();
1651 Segment *ES = SegmentsSortedEnd.begin();
1652 unsigned prevBorder = 0;
1653 unsigned curBorder = 0;
1654 bool isStartBorder;
1655
1656 // Split & Merge
1657 while (ES != SegmentsSortedEnd.end()) {
1658 if (SS != Segments.end() && SS->getStart() < ES->getEnd()) {
1659 isStartBorder = true;
1660 curBorder = SS->getStart();
1661 } else {
1662 isStartBorder = false;
1663 curBorder = ES->getEnd();
1664 }
1665
1666 // To create or extend segment, first check that there are
1667 // open segments or that we haven't already created or extended one
1668 if (OpenedSegments.size() > 0 && prevBorder < curBorder) {
1669 Segment NS =
1670 *std::max_element(OpenedSegments.begin(), OpenedSegments.end(),
1671 [](Segment L, Segment R) {
1672 return L.Strength < R.Strength;
1673 }); // New segment
1674 if (NewSegments.size() > 0 &&
1675 NewSegments.rbegin()->getEnd() == prevBorder &&
1676 // This segment and previous segment abut or overlap. Merge
1677 // as long as they have the same strength.
1678 (NS.Strength == NewSegments.rbegin()->Strength ||
1679 // Also allow for the case that the first one is strong and the
1680 // second one is phicpy. The resulting merged segment is strong,
1681 // because a phicpy segment is valid only if it starts in the
1682 // same place as when it was originally created and there is no
1683 // liveness just before it.
1684 (NS.Strength == Segment::PHICPY &&
1685 NewSegments.rbegin()->Strength == Segment::STRONG))) {
1686 // In these cases we can extend
1687 NewSegments.rbegin()->setEnd(curBorder);
1688 } else {
1689 NS.setStart(prevBorder);
1690 NS.setEnd(curBorder);
1691 NewSegments.push_back(NS);
1692 }
1693 }
1694 prevBorder = curBorder;
1695 if (isStartBorder)
1696 OpenedSegments.insert(*SS++);
1697 else
1698 OpenedSegments.erase(*ES++);
1699 }
1700 Segments = NewSegments;
1701 }
1702
1703 /***********************************************************************
1704 * LiveRange::prepareFuncs : fill the Funcs set with kernel or stack functions
1705 * which this LR is alive in
1706 *
1707 * To support RegAlloc for function groups that consist of kernel and stack
1708 * functions we have to track which kernel/stack functions the LR spans across.
1709 *
1710 */
prepareFuncs(FunctionGroupAnalysis * FGA)1711 void LiveRange::prepareFuncs(FunctionGroupAnalysis *FGA) {
1712 for (auto &Val : getValues()) {
1713 auto Inst = dyn_cast<Instruction>(Val.getValue());
1714 Function *DefFunc = nullptr;
1715 if (Inst && Inst->getParent())
1716 DefFunc = Inst->getFunction();
1717 else if (auto Arg = dyn_cast<Argument>(Val.getValue()))
1718 DefFunc = Arg->getParent();
1719
1720 if (DefFunc) {
1721 auto *DefFuncFG = FGA->getAnyGroup(DefFunc);
1722 IGC_ASSERT_MESSAGE(DefFuncFG, "Cannot find the function group");
1723 Funcs.insert(DefFuncFG->getHead());
1724 }
1725
1726 for (auto U : Val.getValue()->users())
1727 if (Instruction *UserInst = dyn_cast<Instruction>(U)) {
1728 auto F = UserInst->getFunction();
1729 auto *FG = FGA->getAnyGroup(F);
1730 IGC_ASSERT_MESSAGE(FG, "Cannot find the function group");
1731 Funcs.insert(FG->getHead());
1732 }
1733 }
1734 }
1735
1736 /***********************************************************************
1737 * LiveRange::getLength : add up the number of instructions covered by this LR
1738 */
getLength(bool WithWeak) const1739 unsigned LiveRange::getLength(bool WithWeak) const {
1740 unsigned Length = 0;
1741 for (auto i = begin(), e = end(); i != e; ++i) {
1742 if (i->isWeak() && !WithWeak)
1743 continue;
1744 Length += i->getEnd() - i->getStart();
1745 }
1746 return Length;
1747 }
1748
1749 /***********************************************************************
1750 * LiveRange::print : print the live range
1751 * Simplevalues of LR : segments { details }
1752 * Detailed mode exists to print LLVM values
1753 */
print(raw_ostream & OS,bool Details) const1754 void LiveRange::print(raw_ostream &OS, bool Details) const {
1755 if (Details)
1756 OS << "LR values:\n";
1757
1758 if (Values.empty()) {
1759 OS << "<Empty LR>";
1760 return;
1761 }
1762
1763 for (auto &&V : Values) {
1764 if (!Details)
1765 OS << V << "; ";
1766 else
1767 OS << *V.getValue() << "\n";
1768 }
1769
1770 if (!Details)
1771 OS << ":";
1772 else
1773 OS << "LR Segments and details: ";
1774
1775 if (Segments.empty())
1776 OS << "<Empty Segments>";
1777 else
1778 printSegments(OS);
1779
1780 const char *Cat = "???";
1781 switch (Category) {
1782 case RegCategory::NONE: Cat = "none"; break;
1783 case RegCategory::GENERAL: Cat = "general"; break;
1784 case RegCategory::ADDRESS: Cat = "address"; break;
1785 case RegCategory::PREDICATE: Cat = "predicate"; break;
1786 case RegCategory::SAMPLER: Cat = "sampler"; break;
1787 case RegCategory::SURFACE: Cat = "surface"; break;
1788 case RegCategory::EM: Cat = "em"; break;
1789 case RegCategory::RM: Cat = "rm"; break;
1790 }
1791
1792 OS << "{" << Cat << ",align" << (1U << LogAlignment);
1793 if (Offset)
1794 OS << ",offset" << Offset;
1795 OS << "}";
1796 }
1797
1798 /***********************************************************************
1799 * LiveRange::printSegments : print the live range's segments
1800 */
printSegments(raw_ostream & OS) const1801 void LiveRange::printSegments(raw_ostream &OS) const
1802 {
1803 for (auto ri = Segments.begin(), re = Segments.end();
1804 ri != re; ++ri) {
1805 OS << "[";
1806 switch (ri->Strength) {
1807 case Segment::WEAK: OS << "w"; break;
1808 case Segment::PHICPY: OS << "ph"; break;
1809 case Segment::STRONG: /* do nothing */ break;
1810 }
1811 OS << ri->getStart() << "," << ri->getEnd() << ")";
1812 }
1813 }
1814
1815 /***********************************************************************
1816 * IndexFlattener::flatten : convert struct indices into a flattened index
1817 *
1818 * This has a special case of Indices having a single element that is the
1819 * number of elements in ST, which returns the total number of flattened
1820 * indices in the struct.
1821 *
1822 * This involves scanning through the struct layout each time it is called.
1823 * If it is used a lot, it might benefit from some cacheing of the results.
1824 */
flatten(StructType * ST,ArrayRef<unsigned> Indices)1825 unsigned IndexFlattener::flatten(StructType *ST, ArrayRef<unsigned> Indices)
1826 {
1827 if (!Indices.size())
1828 return 0;
1829 unsigned Flattened = 0;
1830 unsigned i = 0;
1831 for (; i != Indices[0]; ++i) {
1832 Type *ElTy = ST->getElementType(i);
1833 if (auto ElST = dyn_cast<StructType>(ElTy))
1834 Flattened += flatten(ElST, ElST->getNumElements());
1835 else
1836 ++Flattened;
1837 }
1838 if (i == ST->getNumElements())
1839 return Flattened; // handle special case noted at the top
1840 Type *ElTy = ST->getElementType(i);
1841 if (auto ElST = dyn_cast<StructType>(ElTy))
1842 Flattened += flatten(ElST, Indices.slice(1));
1843 return Flattened;
1844 }
1845
1846 /***********************************************************************
1847 * IndexFlattener::unflatten : convert flattened index into struct indices
1848 *
1849 * Enter: Indices = vector to put unflattened indices into
1850 *
1851 * Return: number left over from flattened index if it goes off the end
1852 * of the struct (used internally when recursing). If this is
1853 * non-zero, nothing has been pushed into Indices
1854 *
1855 * This involves scanning through the struct layout each time it is called.
1856 * If it is used a lot, it might benefit from some cacheing of the results.
1857 */
unflatten(StructType * ST,unsigned Flattened,SmallVectorImpl<unsigned> * Indices)1858 unsigned IndexFlattener::unflatten(StructType *ST, unsigned Flattened,
1859 SmallVectorImpl<unsigned> *Indices)
1860 {
1861 ++Flattened;
1862 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1863 --Flattened;
1864 Type *ElTy = ST->getElementType(i);
1865 if (auto ElST = dyn_cast<StructType>(ElTy)) {
1866 Indices->push_back(i);
1867 Flattened = unflatten(ElST, Flattened, Indices);
1868 if (!Flattened)
1869 return 0;
1870 Indices->pop_back();
1871 } else if (!Flattened) {
1872 Indices->push_back(i);
1873 return 0;
1874 }
1875 }
1876 return Flattened;
1877 }
1878
1879 /***********************************************************************
1880 * IndexFlattener::getElementType : get type of struct element from
1881 * flattened index
1882 *
1883 * Enter: Ty = type, possibly struct type
1884 * FlattenedIndex = flattened index in the struct, 0 if not struct
1885 *
1886 * Return: type of that element
1887 */
getElementType(Type * Ty,unsigned FlattenedIndex)1888 Type *IndexFlattener::getElementType(Type *Ty, unsigned FlattenedIndex)
1889 {
1890 auto ST = dyn_cast<StructType>(Ty);
1891 if (!ST)
1892 return Ty;
1893 SmallVector<unsigned, 4> Indices;
1894 IndexFlattener::unflatten(ST, FlattenedIndex, &Indices);
1895 IGC_ASSERT(IndexFlattener::flatten(ST, Indices) == FlattenedIndex);
1896 Type *T = 0;
1897 for (unsigned i = 0;;) {
1898 T = ST->getElementType(Indices[i]);
1899 if (++i == Indices.size())
1900 return T;
1901 ST = cast<StructType>(T);
1902 }
1903 }
1904
1905 /***********************************************************************
1906 * IndexFlattener::flattenArg : flatten an arg in a function or call
1907 *
1908 * This calculates the total number of flattened indices used up by previous
1909 * args. If all previous args are not struct type, then this just returns the
1910 * arg index.
1911 */
flattenArg(FunctionType * FT,unsigned ArgIndex)1912 unsigned IndexFlattener::flattenArg(FunctionType *FT, unsigned ArgIndex)
1913 {
1914 unsigned FlattenedIndex = 0;
1915 while (ArgIndex--) {
1916 Type *ArgTy = FT->getParamType(ArgIndex);
1917 FlattenedIndex += getNumElements(ArgTy);
1918 }
1919 return FlattenedIndex;
1920 }
1921
1922 /***********************************************************************
1923 * SimpleValue::getType : get the type of the SimpleValue
1924 */
getType() const1925 Type *SimpleValue::getType() const {
1926 return IndexFlattener::getElementType(V->getType(), Index);
1927 }
1928
1929 /***********************************************************************
1930 * dump, print : debug print a SimpleValue
1931 */
1932 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const1933 void SimpleValue::dump() const
1934 {
1935 print(errs()); errs() << '\n';
1936 }
1937 #endif
print(raw_ostream & OS) const1938 void SimpleValue::print(raw_ostream &OS) const
1939 {
1940 OS << V->getName();
1941 if (Index || isa<StructType>(V->getType()))
1942 OS << "#" << Index;
1943 }
printName(raw_ostream & OS) const1944 void SimpleValue::printName(raw_ostream &OS) const
1945 {
1946 OS << V->getName();
1947 if (Index || isa<StructType>(V->getType()))
1948 OS << "#" << Index;
1949 }
1950
1951 /***********************************************************************
1952 * CallGraph::build : build the call graph for the FunctionGroup
1953 *
1954 * The call graph is acyclic because no recursive edges added here
1955 * CM supports recursion though
1956 */
build(GenXLiveness * Liveness)1957 void genx::CallGraph::build(GenXLiveness *Liveness) {
1958 Nodes.clear();
1959 // Create a node for each Function.
1960 for (auto fgi = FG->begin(), fge = FG->end(); fgi != fge; ++fgi) {
1961 Function *F = *fgi;
1962 (void)Nodes[F];
1963 }
1964 // For each Function, find its call sites and add edges for them.
1965 for (auto fgi = FG->begin() + 1, fge = FG->end(); fgi != fge; ++fgi) {
1966 Function *F = *fgi;
1967 for (Value::use_iterator ui = F->use_begin(), ue = F->use_end(); ui != ue;
1968 ++ui) {
1969 // TODO: deduce possible callsites thru cast chains
1970 if (isa<CallInst>(ui->getUser())) {
1971 auto Call = cast<CallInst>(ui->getUser());
1972 auto Caller = Call->getParent()->getParent();
1973 // do not add edges for indirect, recursive and intrinsic calls
1974 if (Call->getCalledFunction() &&
1975 !GenXIntrinsic::isAnyNonTrivialIntrinsic(Call->getCalledFunction()) &&
1976 Caller != F)
1977 Nodes[Caller].insert(
1978 Edge(Liveness->getNumbering()->getNumber(Call), Call));
1979 }
1980 }
1981 }
1982 }
1983
1984 INITIALIZE_PASS_BEGIN(GenXLivenessWrapper, "GenXLivenessWrapper",
1985 "GenXLivenessWrapper", false, false)
1986 INITIALIZE_PASS_END(GenXLivenessWrapper, "GenXLivenessWrapper",
1987 "GenXLivenessWrapper", false, false)
1988
createGenXLivenessWrapperPass()1989 ModulePass *llvm::createGenXLivenessWrapperPass() {
1990 initializeGenXLivenessWrapperPass(*PassRegistry::getPassRegistry());
1991 return new GenXLivenessWrapper();
1992 }
1993