1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 //
10 /// GenXCategory
11 /// ------------
12 ///
13 /// This pass performs five functions:
14 ///
15 /// 1. It splits any struct phi into a phi for each element of the struct. This
16 /// is done in GenXLowering, but a subsequent pass can re-insert a struct phi so
17 /// this pass mops those up.
18 ///
19 /// 2. It resolves each overlapping circular phi value.
20 ///
21 /// LLVM IR does not attach
22 /// any importance to the order of phi nodes in any particular basic block.
23 /// At the head of a loop, a phi incoming can also be a phi definition in the
24 /// same block, and they could be in either order.
25 ///
26 /// However, once we start constructing live ranges in the GenX backend, we
27 /// attach importance to the order of the phi nodes, so we need to resolve
28 /// any such overlapping circular phi value. Currently we do this by
29 /// inserting a copy (actually a bitcast) just after the phi nodes in that
30 /// basic block. A future enhancement would be to try and re-order the phi
31 /// nodes, and only fall back to copy insertion if there is circularity and
32 /// it is impossible to find a correct order, for example when the loop body
33 /// swaps two variables over.
34 ///
35 /// 3. It inserts a load for any operand that is constant but not allowed to be.
36 /// It also catches any case where constant propagation in EarlyCSE has
37 /// caused a non-simple constant to be propagated into the instruction.
38 /// See the GenXConstants section above.
39 // (in GenXConstants.cpp)
40 ///
41 /// 4. It determines the register category and increased alignment requirement
42 /// (e.g. use as a raw operand) of each value, and stores it by creating a
43 /// LiveRange for the value and storing it there. At this stage the LiveRange
44 /// does not contain any other information; GenXLiveRanges populates it further
45 /// (or erases it if the value turns out to be baled in).
46 ///
47 /// 5. It inserts instructions as required to convert from one register
48 /// category to another, where a value has its def and uses not all requiring
49 /// the same category.
50 ///
51 /// All this pass inserts is a llvm.genx.convert intrinsic. It does not record
52 /// what the categories are. This information is recalculated in GenXLiveness.
53 ///
54 /// The reason for inserting the convert intrinsic calls here, before the final
55 /// run of GenXBaling before GenXLiveRanges, is that we want GenXBaling to spot
56 /// when a convert intrinsic can be baled with rdregion or wrregion.
57 ///
58 /// For one value (function argument or instruction), the pass looks at the
59 /// categories required for the defintion and each use. If there is no address
60 /// conversion involved, then it inserts a single conversion if possible (all
61 /// uses are the same category), otherwise it inserts a conversion for each use
62 /// that requires one.
63 ///
64 /// **IR restriction**: After this pass, a value must have its def and all uses
65 /// requiring the same register category.
66 ///
67 /// Address conversion
68 /// ^^^^^^^^^^^^^^^^^^
69 ///
70 /// An address conversion is treated slightly differently.
71 ///
72 /// A rdregion/wrregion representing an indirect region has a variable index.
73 /// This index is actually an index, whereas the vISA we need to generate for
74 /// it uses an address register that has been set up with an ``add_addr``
75 /// instruction from the index and the base register.
76 ///
77 /// This pass inserts an ``llvm.genx.convert.addr`` intrinsic, with zero offset,
78 /// to represent the conversion from index to address register. However, the
79 /// intrinsic has no way of representing the base register. Instead, the base
80 /// register is implicitly the "old value" input of the rdregion/wrregion where
81 /// the address is used.
82 ///
83 /// The same index may well be used in multiple rdregions and wrregions,
84 /// especially after LLVM's CSE. But at this stage we have no idea whether
85 /// these multiple rdregions/wrregions will have the same base register, so
86 /// we must assume not and insert a separate ``llvm.genx.convert.addr``
87 /// for each rdregion/wrregion use of the index.
88 ///
89 /// These multiple address conversions of the same index are commoned up
90 /// where possible later on in GenXAddressCommoning. That pass runs after
91 /// GenXCoalescing, so it can tell whether two address conversions of the
92 /// same index also have the same base register because the "old value"
93 /// inputs of the regions have been coalesced together.
94 ///
95 /// Where an index used in an indirect region is a constant add, this pass
96 /// inserts the ``llvm.genx.convert.addr`` before that, and turns the constant
97 /// add into ``llvm.genx.add.addr``. The latter can be baled into rdregion
98 /// or wrregion, representing a constant offset in the indirect region.
99 /// Only one ``llvm.genx.add.addr`` is allowed between the
100 /// ``llvm.genx.convert.addr`` and the use in a rdregion/wrregion.
101 ///
102 /// However this pass does not check whether the offset is in range (although
103 /// GenXBaling does check that before deciding to bale it in). The
104 /// GenXAddressCommoning pass sorts that out.
105 ///
106 /// **IR restriction**: After this pass, a variable index in a rdregion/wrregion
107 /// must be the result of ``llvm.genx.convert.addr`` or ``llvm.genx.add.addr``.
108 /// Operand 0 of ``llvm.genx.add.addr`` must be the result of
109 /// ``llvm.genx.convert.addr``.
110 ///
111 /// **IR restriction**: After this pass, up to GenXAddressCommoning, the result
112 /// of ``llvm.genx.convert.addr`` must have a single use in either a
113 /// ``llvm.genx.add.addr`` or as the index in rdregion/wrregion. The result
114 /// of ``llvm.genx.add.addr`` must have a single use as the index in
115 /// rdregion/wrregion.
116 ///
117 //===----------------------------------------------------------------------===//
118 #define DEBUG_TYPE "GENX_CATEGORY"
119
120 #include "FunctionGroup.h"
121 #include "GenX.h"
122 #include "GenXConstants.h"
123 #include "GenXIntrinsics.h"
124 #include "GenXLiveness.h"
125 #include "GenXModule.h"
126 #include "GenXTargetMachine.h"
127 #include "GenXUtil.h"
128
129 #include "vc/GenXOpts/Utils/KernelInfo.h"
130 #include "vc/GenXOpts/Utils/RegCategory.h"
131
132 #include "Probe/Assertion.h"
133 #include "llvmWrapper/IR/DerivedTypes.h"
134 #include "llvmWrapper/IR/InstrTypes.h"
135 #include "llvmWrapper/IR/Instructions.h"
136
137 #include <llvm/ADT/PostOrderIterator.h>
138 #include <llvm/Analysis/CFG.h>
139 #include <llvm/Analysis/ValueTracking.h>
140 #include <llvm/CodeGen/TargetPassConfig.h>
141 #include <llvm/GenXIntrinsics/GenXIntrinsics.h>
142 #include <llvm/IR/BasicBlock.h>
143 #include <llvm/IR/Constants.h>
144 #include <llvm/IR/Dominators.h>
145 #include <llvm/IR/Function.h>
146 #include <llvm/IR/InlineAsm.h>
147 #include <llvm/IR/Instructions.h>
148 #include <llvm/IR/Intrinsics.h>
149 #include <llvm/IR/Metadata.h>
150 #include <llvm/Support/Debug.h>
151
152 using namespace llvm;
153 using namespace genx;
154
155 namespace {
156
157 // CategoryAndAlignment : values returned from getCategoryAndAlignment*
158 // functions
159 struct CategoryAndAlignment {
160 unsigned Cat;
161 unsigned Align;
CategoryAndAlignment__anon0b63afa90111::CategoryAndAlignment162 CategoryAndAlignment(unsigned Cat, unsigned Align = 0) : Cat(Cat), Align(Align) {}
163 };
164
165 class UsesCatInfo;
166
167 // GenX category pass
168 class GenXCategory : public FGPassImplInterface,
169 public IDMixin<GenXCategory> {
170 Function *Func = nullptr;
171 KernelMetadata KM;
172 GenXLiveness *Liveness = nullptr;
173 DominatorTreeGroupWrapperPass *DTs = nullptr;
174 const GenXSubtarget *Subtarget = nullptr;
175 const DataLayout *DL = nullptr;
176 SmallVector<Instruction *, 8> ToErase;
177 bool Modified = false;
178 // Vector of arguments and phi nodes that did not get a category.
179 SmallVector<Value *, 8> NoCategory;
180 bool InFGHead = false;
181 // Sometimes the pass may stuck on strongly connected components.
182 // This field indentifies such case and notifies that there's no need
183 // to wait till some other value's category is defined.
184 bool EnforceCategoryPromotion = false;
185
186 public:
GenXCategory()187 explicit GenXCategory() {}
getPassName()188 static StringRef getPassName() { return "GenX category conversion"; }
189 static void getAnalysisUsage(AnalysisUsage &AU);
190 bool runOnFunctionGroup(FunctionGroup &FG) override;
191 unsigned getCategoryForPhiIncomings(PHINode *Phi) const;
192 unsigned getCategoryForCallArg(Function *Callee, unsigned ArgNo) const;
193 unsigned getCategoryForInlasmConstraintedOp(CallInst *CI, unsigned ArgNo,
194 bool IsOutput) const;
195 CategoryAndAlignment getCategoryAndAlignmentForDef(Value *V) const;
196 CategoryAndAlignment getCategoryAndAlignmentForUse(Value::use_iterator U) const;
197
198 private:
199 using ConvListT = std::array<llvm::Instruction *, RegCategory::NUMCATEGORIES>;
200
201 bool processFunction(Function *F);
202 bool fixCircularPhis(Function *F);
203 bool processValue(Value *V);
204 bool handleLeftover();
205 Instruction *createConversion(Value *V, unsigned Cat);
206 ConvListT buildConversions(Value *Def, CategoryAndAlignment DefInfo, const UsesCatInfo &UsesInfo);
207 };
208
209 // AUse : an address use of a value in processValue()
210 struct AUse {
211 Instruction *user;
212 unsigned OperandNum;
213 unsigned Cat;
AUse__anon0b63afa90111::AUse214 AUse(Value::use_iterator U, unsigned Cat)
215 : user(cast<Instruction>(U->getUser())),
216 OperandNum(U->getOperandNo()), Cat(Cat) {}
217 };
218
219 // almost real input iterator, minimum for range for was implemented
220 class Iterator final {
221 unsigned ShiftedMask_;
222 unsigned CurCat_;
223
224 public:
Iterator(unsigned Mask,unsigned Cat)225 Iterator(unsigned Mask, unsigned Cat) : ShiftedMask_(Mask), CurCat_(Cat) {
226 validate();
227 }
228
operator *() const229 unsigned operator*() const {
230 validate();
231 return CurCat_;
232 }
233
operator ++()234 Iterator &operator++() {
235 validate();
236 ShiftedMask_ /= 2;
237 ++CurCat_;
238 if (ShiftedMask_ == 0) {
239 CurCat_ = RegCategory::NUMCATEGORIES;
240 validate();
241 return *this;
242 }
243 for (; ShiftedMask_ % 2 == 0; ShiftedMask_ /= 2, ++CurCat_)
244 ;
245 validate();
246 return *this;
247 }
248
operator ==(const Iterator & lhs,const Iterator & rhs)249 friend bool operator==(const Iterator &lhs, const Iterator &rhs) {
250 return (lhs.ShiftedMask_ == rhs.ShiftedMask_ &&
251 lhs.CurCat_ == rhs.CurCat_);
252 }
253
operator !=(const Iterator & lhs,const Iterator & rhs)254 friend bool operator!=(const Iterator &lhs, const Iterator &rhs) {
255 return !(lhs == rhs);
256 }
257
258 private:
validate() const259 void validate() const {
260 IGC_ASSERT_MESSAGE((ShiftedMask_ % 2 == 1 || CurCat_ == RegCategory::NUMCATEGORIES),
261 "invalid state");
262 }
263 };
264
265 // Implements only begin() and end()
266 // to iterate over categories of uses.
267 class Categories final {
268 unsigned Mask_;
269
270 public:
Categories(unsigned Mask)271 explicit Categories(unsigned Mask) : Mask_(Mask) {}
272
begin() const273 Iterator begin() const {
274 // we have no category
275 if (!Mask_)
276 return end();
277 // we have NONE category
278 if (Mask_ % 2 == 1)
279 return Iterator(Mask_, 0);
280 // we adding NONE category
281 Iterator FalseBegin(Mask_ + 1, 0);
282 // and now we get the real first category
283 return ++FalseBegin;
284 }
285
end() const286 Iterator end() const { return Iterator(0, RegCategory::NUMCATEGORIES); }
287 };
288
289 // Encapsulates Category'n'Alignment analysis of value uses.
290 class UsesCatInfo final {
291 using UsesT = llvm::SmallVector<AUse, 8>;
292 UsesT Uses_;
293 unsigned Mask_;
294 unsigned MaxAlign_;
295 unsigned MostUsedCat_;
296
297 public:
UsesCatInfo()298 UsesCatInfo() : Uses_(), Mask_(0), MaxAlign_(0) {}
299
UsesCatInfo(const GenXCategory & PassInfo,Value * V)300 UsesCatInfo(const GenXCategory &PassInfo, Value *V) : UsesCatInfo() {
301 std::array<int, RegCategory::NUMCATEGORIES> Stat = {0};
302 for (auto ui = V->use_begin(), ue = V->use_end(); ui != ue; ++ui) {
303 auto CatAlign = PassInfo.getCategoryAndAlignmentForUse(ui);
304 MaxAlign_ = std::max(MaxAlign_, CatAlign.Align);
305 Uses_.push_back(AUse(ui, CatAlign.Cat));
306 Mask_ |= 1 << CatAlign.Cat;
307 if (CatAlign.Cat != RegCategory::NONE)
308 ++Stat[CatAlign.Cat];
309 }
310 auto MaxInStatIt = std::max_element(Stat.begin(), Stat.end());
311 MostUsedCat_ = MaxInStatIt - Stat.begin();
312 }
313
empty() const314 bool empty() const { return !Mask_; }
315
allHaveCat(unsigned cat) const316 bool allHaveCat(unsigned cat) const { return !(Mask_ & ~(1 << cat)); }
317
getUses() const318 const UsesT &getUses() const { return Uses_; }
319
getMaxAlign() const320 unsigned getMaxAlign() const { return MaxAlign_; }
321
322 // When there's no real category uses (real is anything but NONE)
323 // behavior is undefined.
getMostUsedCat() const324 unsigned getMostUsedCat() const {
325 IGC_ASSERT_MESSAGE(!empty(),
326 "works only for cases when there are uses with real categories");
327 IGC_ASSERT_MESSAGE(!allHaveCat(RegCategory::NONE),
328 "works only for cases when there are uses with real categories");
329 return MostUsedCat_;
330 }
331
332 // meant to be used in range for
getCategories() const333 Categories getCategories() const { return Categories(Mask_); }
334 };
335
placeConvAfterDef(Function * Func,Instruction * Conv,Value * Def)336 void placeConvAfterDef(Function *Func, Instruction *Conv, Value *Def) {
337 if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
338 // Original value is an instruction. Insert just after it.
339 Conv->insertAfter(Inst);
340 Conv->setDebugLoc(Inst->getDebugLoc());
341 } else {
342 IGC_ASSERT_MESSAGE(isa<Argument>(Def), "must be an argument if not an instruction");
343 // Original value is a function argument. Insert at the start of the
344 // function.
345 Conv->insertBefore(&*Func->begin()->begin());
346 }
347 }
348
placeConvBeforeUse(Instruction * Conv,Instruction * Use,unsigned UseOperand)349 void placeConvBeforeUse(Instruction *Conv, Instruction *Use,
350 unsigned UseOperand) {
351 if (auto PhiUse = dyn_cast<PHINode>(Use)) {
352 // Use is in a phi node. Insert before terminator in corresponding
353 // incoming block.
354 Conv->insertBefore(PhiUse->getIncomingBlock(UseOperand)->getTerminator());
355 } else {
356 // Insert just before use.
357 Conv->insertBefore(Use);
358 Conv->setDebugLoc(Use->getDebugLoc());
359 }
360 }
361
362 } // end anonymous namespace
363
364 namespace llvm {
365 void initializeGenXCategoryWrapperPass(PassRegistry &);
366 using GenXCategoryWrapper = FunctionGroupWrapperPass<GenXCategory>;
367 } // namespace llvm
368 INITIALIZE_PASS_BEGIN(GenXCategoryWrapper, "GenXCategoryWrapper",
369 "GenXCategoryWrapper", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeGroupWrapperPassWrapper)370 INITIALIZE_PASS_DEPENDENCY(DominatorTreeGroupWrapperPassWrapper)
371 INITIALIZE_PASS_DEPENDENCY(GenXLivenessWrapper)
372 INITIALIZE_PASS_END(GenXCategoryWrapper, "GenXCategoryWrapper",
373 "GenXCategoryWrapper", false, false)
374
375 ModulePass *llvm::createGenXCategoryWrapperPass() {
376 initializeGenXCategoryWrapperPass(*PassRegistry::getPassRegistry());
377 return new GenXCategoryWrapper();
378 }
379
getAnalysisUsage(AnalysisUsage & AU)380 void GenXCategory::getAnalysisUsage(AnalysisUsage &AU) {
381 AU.addRequired<DominatorTreeGroupWrapperPass>();
382 AU.addRequired<GenXLiveness>();
383 AU.addRequired<TargetPassConfig>();
384 AU.addPreserved<GenXModule>();
385 AU.addPreserved<GenXLiveness>();
386 AU.addPreserved<FunctionGroupAnalysis>();
387 AU.addPreserved<DominatorTreeGroupWrapperPass>();
388 AU.setPreservesCFG();
389 }
390
391 /***********************************************************************
392 * runOnFunctionGroup : run the category conversion pass for
393 * this FunctionGroup
394 */
runOnFunctionGroup(FunctionGroup & FG)395 bool GenXCategory::runOnFunctionGroup(FunctionGroup &FG)
396 {
397 KM = KernelMetadata(FG.getHead());
398 DTs = &getAnalysis<DominatorTreeGroupWrapperPass>();
399 Liveness = &getAnalysis<GenXLiveness>();
400 Subtarget = &getAnalysis<TargetPassConfig>()
401 .getTM<GenXTargetMachine>()
402 .getGenXSubtarget();
403 DL = &FG.getModule()->getDataLayout();
404 EnforceCategoryPromotion = false;
405 bool Modified = false;
406 if (KM.isKernel()) {
407 // Get the offset of each kernel arg.
408 for (auto ai = FG.getHead()->arg_begin(), ae = FG.getHead()->arg_end();
409 ai != ae; ++ai) {
410 Argument *Arg = &*ai;
411 Liveness->getOrCreateLiveRange(Arg)->Offset = KM.getArgOffset(Arg->getArgNo());
412 }
413 }
414 // Mop up any struct phis, splitting into elements.
415 for (auto i = FG.begin(), e = FG.end(); i != e; ++i)
416 Modified |= splitStructPhis(*i);
417 // Do category conversion on each function in the group.
418 InFGHead = true;
419 for (auto i = FG.begin(), e = FG.end(); i != e; ++i) {
420 Modified |= processFunction(*i);
421 InFGHead = false;
422 }
423 Modified |= handleLeftover();
424 return Modified;
425 }
426
427 // Now iteratively process values that did not get a category. A valid
428 // category will eventually propagate through a web of phi nodes
429 // and/or subroutine args.
handleLeftover()430 bool GenXCategory::handleLeftover() {
431 if (NoCategory.empty())
432 return false;
433 while (!NoCategory.empty()) {
434 auto NewEnd = std::remove_if(NoCategory.begin(), NoCategory.end(),
435 [this](Value *V) { return processValue(V); });
436 if (NewEnd == NoCategory.end()) {
437 IGC_ASSERT_MESSAGE(!EnforceCategoryPromotion,
438 "category promotion was enforced, still no progress");
439 EnforceCategoryPromotion = true;
440 continue;
441 }
442 // No need to enforce category promotion when there is progress. Even if
443 // it was enforced before.
444 EnforceCategoryPromotion = false;
445 NoCategory.erase(NewEnd, NoCategory.end());
446 }
447 return true;
448 }
449
450 // Common up constpred calls within a block.
commonUpPredicate(BasicBlock * BB)451 static bool commonUpPredicate(BasicBlock *BB) {
452 bool Changed = false;
453 // Map from flatten predicate value to its constpred calls.
454 using key_type = std::pair<char, uint64_t>;
455 SmallDenseMap<key_type, SmallVector<Instruction *, 8>> ValMap;
456
457 for (auto &Inst : BB->getInstList()) {
458 if (GenXIntrinsic::getGenXIntrinsicID(&Inst) == GenXIntrinsic::genx_constantpred) {
459 Constant *V = cast<Constant>(Inst.getOperand(0));
460 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(V->getType())) {
461 unsigned NElts = VT->getNumElements();
462 if (NElts > 64)
463 continue;
464 uint64_t Bits = 0;
465 for (unsigned i = 0; i != NElts; ++i)
466 if (!V->getAggregateElement(i)->isNullValue())
467 Bits |= ((uint64_t)1 << i);
468 key_type Key{NElts, Bits};
469 auto Iter = ValMap.find(Key);
470 if (Iter == ValMap.end())
471 ValMap[Key].push_back(&Inst);
472 else if (Inst.hasOneUse() && Inst.user_back()->getParent() == BB)
473 // Just in case constpred is not from constant predicate loading. This
474 // ensures the first instruction dominates others in the same vector.
475 (Iter->second).push_back(&Inst);
476 }
477 }
478 }
479
480 // Common up when there are more than 2 uses, in which case it will not be
481 // worse than flag spills.
482 for (auto I = ValMap.begin(), E = ValMap.end(); I != E; ++I) {
483 auto &V = I->second;
484 int n = (int)V.size();
485 if (n > 2) {
486 Instruction *DomInst = V.front();
487 for (int i = 1; i < n; ++i) {
488 V[i]->replaceAllUsesWith(DomInst);
489 V[i]->eraseFromParent();
490 }
491 Changed = true;
492 }
493 }
494
495 return Changed;
496 }
497
498 /***********************************************************************
499 * processFunction : run the category conversion pass for this Function
500 *
501 * This does a postordered depth first traversal of the CFG,
502 * processing instructions within a basic block in reverse, to
503 * ensure that we see a def after its uses (ignoring phi node uses).
504 * This is specifically useful for an address conversion, where we want to
505 * see the constant add used in an indirect region (and convert it into a
506 * llvm.genx.add.addr) before we see the instruction it uses.
507 */
processFunction(Function * F)508 bool GenXCategory::processFunction(Function *F)
509 {
510 Func = F;
511 // Before doing the category conversion, fix circular phis.
512 Modified = fixCircularPhis(F);
513 // Load constants in phi nodes.
514 loadPhiConstants(*F, DTs->getDomTree(F), *Subtarget, *DL, false);
515 // Process all instructions.
516 for (po_iterator<BasicBlock *> i = po_begin(&Func->getEntryBlock()),
517 e = po_end(&Func->getEntryBlock()); i != e; ++i) {
518 // This loop scans the basic block backwards. If any code is inserted
519 // before the current point, that code is scanned too.
520 BasicBlock *BB = *i;
521 for (Instruction *Inst = &BB->back(); Inst;
522 Inst = (Inst == &BB->front() ? nullptr : Inst->getPrevNode())) {
523 Modified |= loadNonSimpleConstants(Inst, *Subtarget, *DL, nullptr);
524 Modified |= loadConstants(Inst, *Subtarget, *DL);
525 if (!processValue(Inst))
526 NoCategory.push_back(Inst);
527 }
528
529 // This commons up constpred calls just loaded.
530 Modified |= commonUpPredicate(BB);
531
532 // Erase instructions (and their live ranges) as requested by processValue.
533 for (unsigned i = 0, e = ToErase.size(); i != e; ++i) {
534 Liveness->eraseLiveRange(ToErase[i]);
535 ToErase[i]->eraseFromParent();
536 }
537 ToErase.clear();
538 }
539 // Process all args.
540 for (auto fi = Func->arg_begin(), fe = Func->arg_end(); fi != fe; ++fi) {
541 Value *V = &*fi;
542 if (!processValue(V))
543 NoCategory.push_back(V);
544 }
545 return Modified;
546 }
547
548 /***********************************************************************
549 * fixCircularPhis : fix up overlapping circular phi nodes
550 *
551 * A phi node at the head of a loop can have a use in the phi nodes in the same
552 * basic block. If the use is after the def, it still refers to the value in
553 * the previous loop iteration, but the GenX backend cannot cope with the
554 * live range going round the loop and overlapping with its own start.
555 *
556 * This function spots any such phi node and works around it by inserting an
557 * extra copy (bitcast) just after the phi nodes in the basic block.
558 *
559 * A better solution for the future would be to re-order the phi nodes if
560 * possible, and only fall back to inserting a copy if there is circularity
561 * (e.g. a loop that swaps two variables in its body).
562 */
fixCircularPhis(Function * F)563 bool GenXCategory::fixCircularPhis(Function *F)
564 {
565 bool Modified = false;
566 for (auto fi = Func->begin(), fe = Func->end(); fi != fe; ++fi) {
567 BasicBlock *BB = &*fi;
568 // Process phi nodes in one basic block.
569 for (auto bi = BB->begin(); ; ++bi) {
570 auto Phi = dyn_cast<PHINode>(&*bi);
571 if (!Phi)
572 break; // end of phi nodes
573 if (!GenXLiveness::wrapsAround(Phi, Phi))
574 continue;
575 // Overlapping circular phi node. Insert a copy.
576 // Note that the copy has to be split in the same way as a copy
577 // inserted in GenXCoalescing when coalescing fails, but we have
578 // our own code here because at this point we do not have any real
579 // and possibly coalesced live ranges like GenXCoalescing does.
580 Modified = true;
581 SmallVector<Use *, 8> Uses;
582 for (auto ui = Phi->use_begin(), ue = Phi->use_end(); ui != ue; ++ui)
583 Uses.push_back(&*ui);
584 // A phi node is never a struct -- GenXLowering removed struct phis.
585 IGC_ASSERT(!isa<StructType>(Phi->getType()));
586 // Insert a copy, split as required to be legal.
587 auto NewCopy =
588 Liveness->insertCopy(Phi, nullptr, BB->getFirstNonPHI(),
589 Phi->getName() + ".unoverlapper", 0, Subtarget);
590 // Change the uses that existed before we added the copy to use the
591 // copy instead.
592 for (auto ui = Uses.begin(), ue = Uses.end(); ui != ue; ++ui)
593 **ui = NewCopy;
594 }
595 }
596 return Modified;
597 }
598
599 /***********************************************************************
600 * processValue : category conversion for one value
601 *
602 * Return: whether category successfully chosen
603 *
604 * This returns false only for a function argument or a phi node where all
605 * uses are in phi nodes which themselves do not have a category yet.
606 */
processValue(Value * V)607 bool GenXCategory::processValue(Value *V)
608 {
609 // Check for special cases.
610 // Ignore void.
611 if (V->getType()->isVoidTy())
612 return true;
613 // Ignore i1 or vector of i1. Predicates do not use category
614 // conversion.
615 if (V->getType()->getScalarType()->isIntegerTy(1))
616 return true;
617 // Elements of a struct always have default (general or predicate) category.
618 if (isa<StructType>(V->getType()))
619 return true;
620
621 auto DefInfo = getCategoryAndAlignmentForDef(V);
622 UsesCatInfo UsesInfo(*this, V);
623
624 // more corner cases
625 if (UsesInfo.empty()) {
626 // Value not used: set its category and then ignore it. If the definition
627 // did not give us a category (probably an unused function arg), then
628 // arbitrarily make it general.
629 if (DefInfo.Cat == RegCategory::NONE)
630 Liveness->getOrCreateLiveRange(V, RegCategory::GENERAL, DefInfo.Align);
631 else
632 Liveness->getOrCreateLiveRange(V, DefInfo.Cat, DefInfo.Align);
633 return true;
634 }
635 else if (UsesInfo.allHaveCat(RegCategory::NONE))
636 {
637 if (DefInfo.Cat == RegCategory::NONE) {
638 // The "no categories at all" case can only happen for a value that is
639 // defined by a function argument or a phi node and used only in phi
640 // nodes or subroutine call args.
641 IGC_ASSERT_MESSAGE((isa<Argument>(V) || isa<PHINode>(V)), "no register category");
642 return false;
643 }
644 // Value defined with a category but only used in phi nodes.
645 Liveness->getOrCreateLiveRange(V, DefInfo.Cat, DefInfo.Align);
646 return true;
647 }
648
649 // main case
650 if (DefInfo.Cat == RegCategory::NONE) {
651 // NONE means that we're free to choose the category
652 if (isa<PHINode>(V))
653 // currently we'd like to propogate general through phi
654 DefInfo.Cat = RegCategory::GENERAL;
655 else
656 DefInfo.Cat = UsesInfo.getMostUsedCat();
657 }
658
659 Liveness->getOrCreateLiveRange(V, DefInfo.Cat, std::max(DefInfo.Align, UsesInfo.getMaxAlign()));
660 auto Convs = buildConversions(V, DefInfo, UsesInfo);
661 for (auto UseInfo : UsesInfo.getUses()) {
662 if (UseInfo.Cat != DefInfo.Cat && UseInfo.Cat != RegCategory::NONE) {
663 Instruction *Conv;
664 if (UseInfo.Cat == RegCategory::ADDRESS) {
665 // Case of address category requires a separate conversion for each use, at least until we
666 // get to GenXAddressCommoning where we decide whether we can common some of them up.
667 Conv = createConversion(V, UseInfo.Cat);
668 placeConvBeforeUse(Conv, UseInfo.user, UseInfo.OperandNum);
669 Liveness->getOrCreateLiveRange(Conv)->setCategory(UseInfo.Cat);
670 }
671 else
672 Conv = Convs[UseInfo.Cat];
673 IGC_ASSERT_MESSAGE(Conv, "must have such conversion");
674 UseInfo.user->setOperand(UseInfo.OperandNum, Conv);
675 }
676 }
677 // If V is now unused (which happens if it is a constant add and all its
678 // uses were addresses), then remove it.
679 if (V->use_empty())
680 ToErase.push_back(cast<Instruction>(V));
681 return true;
682 }
683
684 /***********************************************************************
685 * createConversion : create call to llvm.genx.convert intrinsic to represent
686 * register category conversion
687 *
688 * The new instruction is not inserted anywhere yet.
689 *
690 * In the case that we are asked to convert a use of an add or constant sub
691 * to an address, we instead create an llvm.genx.add.addr of the input
692 * to the add/sub.
693 */
createConversion(Value * V,unsigned Cat)694 Instruction *GenXCategory::createConversion(Value *V, unsigned Cat)
695 {
696 IGC_ASSERT_MESSAGE(V->getType()->getScalarType()->isIntegerTy(),
697 "createConversion expects int type");
698 if (Cat == RegCategory::ADDRESS) {
699 Value *Input = V;
700 int Offset = 0;
701 for (;;) {
702 // Check for use of add/sub that can be baled in to a region as a
703 // constant offset. This also handles a chain of two or more adds.
704 int ThisOffset;
705 if (!GenXBaling::getIndexAdd(Input, &ThisOffset) &&
706 !GenXBaling::getIndexOr(Input, ThisOffset))
707 break;
708 if (ThisOffset < G4_MIN_ADDR_IMM)
709 break;
710 Offset += ThisOffset;
711 Input = cast<Instruction>(Input)->getOperand(0);
712 }
713 if (Input != V) {
714 // Turn the add/sub into llvm.genx.add.addr. This could be out of range as
715 // a constant offset in an indirect operand at this stage;
716 // GenXAddressCommoning sorts that out by adjusting the constant offset in
717 // the llvm.genx.convert.addr.
718 return createAddAddr(Input, ConstantInt::get(V->getType(), Offset),
719 V->getName() + ".addradd", nullptr, Func->getParent());
720 }
721 }
722 // Normal conversion. If the source is an integer creation intrinsic
723 // and this isn't an address conversion, use the operand for that
724 // intrinsic call directly rather than using the result of the intrinsic.
725 // This helps the jitter to generate better code when surface constants
726 // are used in send intructions.
727 if (Cat != RegCategory::ADDRESS) {
728 if (GenXIntrinsic::getGenXIntrinsicID(V) == GenXIntrinsic::genx_constanti)
729 V = cast<CallInst>(V)->getArgOperand(0);
730 return createConvert(V, V->getName() + ".categoryconv", nullptr,
731 Func->getParent());
732 }
733 return createConvertAddr(V, 0, V->getName() + ".categoryconv", nullptr,
734 Func->getParent());
735 }
736
737 /***********************************************************************
738 * Creates conversion instructions, places them in the function (next to the
739 * def)
740 *
741 * Returns an array of created conversion (cons[Category] holds
742 * instruction if we need conversion to such Category and nullptr otherwise).
743 * Doesn't produce address category conversion.
744 */
745 GenXCategory::ConvListT
buildConversions(Value * Def,CategoryAndAlignment DefInfo,const UsesCatInfo & UsesInfo)746 GenXCategory::buildConversions(Value *Def, CategoryAndAlignment DefInfo,
747 const UsesCatInfo &UsesInfo) {
748 ConvListT Convs = {nullptr};
749 for (auto Cat : UsesInfo.getCategories()) {
750 // NONE doesn't require conversion, ADDRESS requirs conversion before
751 // every use (not after def, so we won't create it here)
752 if (Cat != DefInfo.Cat && Cat != RegCategory::NONE &&
753 Cat != RegCategory::ADDRESS) {
754 auto Conv = createConversion(Def, Cat);
755 placeConvAfterDef(Func, Conv, Def);
756 Liveness->getOrCreateLiveRange(Conv)->setCategory(Cat);
757 Convs[Cat] = Conv;
758 }
759 }
760 return Convs;
761 }
762
763 /***********************************************************************
764 * intrinsicCategoryToRegCategory : convert intrinsic arg category to
765 * register category
766 *
767 * This converts a GenXIntrinsicInfo::* category, as returned by
768 * GenXIntrinsicInfo::ArgInfo::getCategory(), into a register category
769 * as stored in a live range.
770 */
intrinsicCategoryToRegCategory(unsigned ICat)771 static unsigned intrinsicCategoryToRegCategory(unsigned ICat)
772 {
773 switch (ICat) {
774 case GenXIntrinsicInfo::ADDRESS:
775 return RegCategory::ADDRESS;
776 case GenXIntrinsicInfo::PREDICATION:
777 case GenXIntrinsicInfo::PREDICATE:
778 return RegCategory::PREDICATE;
779 case GenXIntrinsicInfo::SAMPLER:
780 return RegCategory::SAMPLER;
781 case GenXIntrinsicInfo::SURFACE:
782 return RegCategory::SURFACE;
783 default:
784 return RegCategory::GENERAL;
785 }
786 }
787
788 /***********************************************************************
789 * getCategoryAndAlignmentForDef : get register category and alignment for a def
790 *
791 * This returns RegCategory:: value, or RegCategory::NONE if no category
792 * is discernable.
793 */
getCategoryAndAlignmentForDef(Value * V) const794 CategoryAndAlignment GenXCategory::getCategoryAndAlignmentForDef(Value *V) const
795 {
796 if (V->getType()->getScalarType()->getPrimitiveSizeInBits() == 1)
797 return RegCategory::PREDICATE;
798 if (Argument *Arg = dyn_cast<Argument>(V)) {
799 auto *F = Arg->getParent();
800 // This is a function Argument.
801 if (!InFGHead) {
802 // It is an arg in a subroutine. Get the category from the corresponding
803 // arg at some call site. (We should not have disagreement among the
804 // call sites and the function arg, since whichever one gets a category
805 // first forces the category of all the others.)
806 return getCategoryForCallArg(F, Arg->getArgNo());
807 }
808 unsigned ArgNo = Arg->getArgNo();
809 if (KM.getNumArgs() > ArgNo) {
810 // The function is a kernel, and has argument kind metadata for
811 // this argument. Determine the category from the kind.
812 return KM.getArgCategory(ArgNo);
813 }
814 // The function is not a kernel, or does not have the appropriate
815 // metadata. Set to no particular category, so the arg's uses will
816 // determine the category. This is the fallback for compatibility with
817 // hand coded LLVM IR from before this metadata was added. (If we only
818 // had to cope with non-kernel functions, we could just return GENERAL.)
819 // FIXME: temporary fix for stack calls. We need to figure out how to
820 // determine arguments category if it cannot be deduced from the arg uses.
821 // * calls from another function groups might help (but we do not have
822 // liveness -> category for them). What about standalone stack calls?
823 IGC_ASSERT(genx::requiresStackCall(F));
824 return getCategoryForCallArg(F, Arg->getArgNo());
825 }
826 // The def is a phi-instruction.
827 if (PHINode *Phi = dyn_cast<PHINode>(V)) {
828 // This is a phi node. Get the category from one of the incomings. (We
829 // should not have disagreement among the incomings, since whichever
830 // one gets a category first forces the category of all the others.)
831 return getCategoryForPhiIncomings(Phi);
832 }
833 // Multiple outputs of inline assembly instruction
834 // result in a structure and those elements are extracted
835 // with extractelement
836 if (ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(V)) {
837 auto CI = dyn_cast<CallInst>(Extract->getAggregateOperand());
838 if (CI && CI->isInlineAsm())
839 return getCategoryForInlasmConstraintedOp(CI, Extract->getIndices()[0],
840 true /*IsOutput*/);
841 }
842 // The def is a call-inst
843 if (CallInst *CI = dyn_cast<CallInst>(V)) {
844 if (Function *Callee = CI->getCalledFunction()) {
845 unsigned IntrinsicID = GenXIntrinsic::getAnyIntrinsicID(Callee);
846 // We should not see genx_convert, as it is inserted into a value after
847 // using this function to determine its category.
848 IGC_ASSERT(IntrinsicID != GenXIntrinsic::genx_convert);
849 if (IntrinsicID == GenXIntrinsic::genx_convert_addr)
850 return RegCategory::ADDRESS;
851 if (GenXIntrinsic::isAnyNonTrivialIntrinsic(IntrinsicID) && !GenXIntrinsic::isRdRegion(IntrinsicID)
852 && !GenXIntrinsic::isWrRegion(IntrinsicID) && !GenXIntrinsic::isAbs(IntrinsicID)) {
853 // For any normal intrinsic, look up the argument class.
854 GenXIntrinsicInfo II(IntrinsicID);
855 auto AI = II.getRetInfo();
856 return CategoryAndAlignment(
857 intrinsicCategoryToRegCategory(AI.getCategory()),
858 getLogAlignment(AI.getAlignment(), Subtarget
859 ? Subtarget->getGRFByteSize()
860 : defaultGRFByteSize));
861 } else if (GenXIntrinsic::isRdRegion(IntrinsicID)) {
862 // Add this to avoid conversion in case of read-region on SurfaceIndex
863 // or SamplerIndex type
864 auto RC = getCategoryAndAlignmentForDef(
865 CI->getOperand(GenXIntrinsic::GenXRegion::OldValueOperandNum));
866 if (RC.Cat == RegCategory::SURFACE ||
867 RC.Cat == RegCategory::SAMPLER)
868 return RC.Cat;
869 }
870 } else if (CI->isInlineAsm()) {
871 return getCategoryForInlasmConstraintedOp(CI, 0, true /*IsOutput*/);
872 }
873 }
874 return RegCategory::GENERAL;
875 }
876
877 /***********************************************************************
878 * getCategoryForInlasmConstraintedOp : get register category for a
879 * operand of inline assembly (both for
880 * output and for input). Category of
881 * operand depends on its constraint.
882 *
883 */
getCategoryForInlasmConstraintedOp(CallInst * CI,unsigned ArgNo,bool IsOutput) const884 unsigned GenXCategory::getCategoryForInlasmConstraintedOp(CallInst *CI,
885 unsigned ArgNo,
886 bool IsOutput) const {
887 IGC_ASSERT_MESSAGE(CI->isInlineAsm(), "Inline asm expected");
888 InlineAsm *IA = dyn_cast<InlineAsm>(IGCLLVM::getCalledValue(CI));
889 IGC_ASSERT_MESSAGE(!IA->getConstraintString().empty(), "Here should be constraints");
890
891 auto ConstraintsInfo = genx::getGenXInlineAsmInfo(CI);
892
893 if (!IsOutput)
894 ArgNo += genx::getInlineAsmNumOutputs(CI);
895 auto Info = ConstraintsInfo[ArgNo];
896
897 switch (Info.getConstraintType()) {
898 default:
899 IGC_ASSERT_EXIT_MESSAGE(0, "unreachable while setting category in constraints");
900 case ConstraintType::Constraint_a:
901 case ConstraintType::Constraint_rw:
902 case ConstraintType::Constraint_r:
903 return RegCategory::GENERAL;
904 case ConstraintType::Constraint_n:
905 case ConstraintType::Constraint_i:
906 case ConstraintType::Constraint_F:
907 return RegCategory::NONE;
908 case ConstraintType::Constraint_cr:
909 return RegCategory::PREDICATE;
910 }
911 }
912
913 /***********************************************************************
914 * getCategoryAndAlignmentForUse : get register category for a use
915 *
916 * This returns RegCategory:: value, or RegCategory::NONE if no category
917 * is discernable.
918 */
getCategoryAndAlignmentForUse(Value::use_iterator U) const919 CategoryAndAlignment GenXCategory::getCategoryAndAlignmentForUse(
920 Value::use_iterator U) const
921 {
922 Value *V = U->get();
923 if (V->getType()->getScalarType()->isIntegerTy(1))
924 return RegCategory::PREDICATE;
925 auto user = cast<Instruction>(U->getUser());
926 if (PHINode *Phi = dyn_cast<PHINode>(user)) {
927 // This is a phi node. Get the category (if any) from the result, or from
928 // one of the incomings. (We should not have disagreement among the
929 // incomings, since whichever one gets a category first forces the category
930 // of all the others.)
931 if (auto LR = Liveness->getLiveRangeOrNull(Phi)) {
932 auto Cat = LR->getCategory();
933 if (Cat != RegCategory::NONE)
934 return Cat;
935 }
936 return getCategoryForPhiIncomings(Phi);
937 }
938 unsigned Category = RegCategory::GENERAL;
939 if (CallInst *CI = dyn_cast<CallInst>(user)) {
940 if (CI->isInlineAsm())
941 Category = getCategoryForInlasmConstraintedOp(CI, U->getOperandNo(),
942 false /*IsOutput*/);
943 else if (IGCLLVM::isIndirectCall(*CI))
944 Category = RegCategory::GENERAL;
945 else {
946 Function *Callee = CI->getCalledFunction();
947 unsigned IntrinID = GenXIntrinsic::not_any_intrinsic;
948 if (Callee)
949 IntrinID = GenXIntrinsic::getAnyIntrinsicID(Callee);
950 // We should not see genx_convert, as it is inserted into a value after
951 // using this function to determine its category.
952 IGC_ASSERT(IntrinID != GenXIntrinsic::genx_convert);
953 // For a read or write region or element intrisic, where the use we have
954 // is the address, mark as needing an address register.
955 switch (IntrinID) {
956 case GenXIntrinsic::not_any_intrinsic:
957 // Arg in subroutine call. Get the category from the function arg,
958 // or the arg at another call site. (We should not have disagreement
959 // among the call sites and the function arg, since whichever one
960 // gets a category first forces the category of all the others.)
961 Category = getCategoryForCallArg(Callee, U->getOperandNo());
962 break;
963 case GenXIntrinsic::genx_convert_addr:
964 Category = RegCategory::GENERAL;
965 break;
966 case GenXIntrinsic::genx_rdregioni:
967 case GenXIntrinsic::genx_rdregionf:
968 if (U->getOperandNo() == 4) // is addr-operand
969 Category = RegCategory::ADDRESS;
970 else if (GenXIntrinsic::GenXRegion::OldValueOperandNum == U->getOperandNo())
971 Category = RegCategory::NONE; // do not assign use-category
972 break;
973 case GenXIntrinsic::genx_wrregioni:
974 case GenXIntrinsic::genx_wrregionf:
975 if (U->getOperandNo() == 5) // is addr-operand
976 Category = RegCategory::ADDRESS;
977 break;
978 case GenXIntrinsic::genx_absf:
979 case GenXIntrinsic::genx_absi:
980 case GenXIntrinsic::genx_output:
981 case GenXIntrinsic::genx_output_1:
982 break;
983 default: {
984 // For any other intrinsic, look up the argument class.
985 GenXIntrinsicInfo II(IntrinID);
986 auto AI = II.getArgInfo(U->getOperandNo());
987 return CategoryAndAlignment(
988 intrinsicCategoryToRegCategory(AI.getCategory()),
989 getLogAlignment(AI.getAlignment(),
990 Subtarget ? Subtarget->getGRFByteSize()
991 : defaultGRFByteSize));
992 }
993 break;
994 }
995 }
996 }
997 return Category;
998 }
999
1000 /***********************************************************************
1001 * getCategoryForPhiIncomings : get register category from phi incomings
1002 *
1003 * Return: register category from a non-const incoming with a known category
1004 * else NONE if at least one incoming is non-constant
1005 * else GENERAL
1006 *
1007 * We will not have disagreement among the incomings, since whichever one gets
1008 * a category first forces the category of all the others.
1009 */
getCategoryForPhiIncomings(PHINode * Phi) const1010 unsigned GenXCategory::getCategoryForPhiIncomings(PHINode *Phi) const {
1011 IGC_ASSERT_MESSAGE(!Phi->getType()->isIntOrIntVectorTy(1),
1012 "pregicate phis should've been already considered");
1013 if (llvm::all_of(Phi->incoming_values(),
1014 [](const Use &Op) { return isa<Constant>(Op.get()); }))
1015 // All incomings are constant. Arbitrarily make the phi node value
1016 // general category.
1017 return RegCategory::GENERAL;
1018
1019 auto IncomingWithCategory =
1020 llvm::find_if(Phi->incoming_values(), [this](const Use &Op) {
1021 auto *LR = Liveness->getLiveRangeOrNull(Op.get());
1022 return LR && LR->getCategory() != RegCategory::NONE;
1023 });
1024 if (IncomingWithCategory != Phi->incoming_values().end()) {
1025 auto PhiCategory =
1026 Liveness->getLiveRange(IncomingWithCategory->get())->getCategory();
1027 IGC_ASSERT_MESSAGE(
1028 llvm::all_of(Phi->incoming_values(),
1029 [this, PhiCategory](const Use &Op) {
1030 auto *LR = Liveness->getLiveRangeOrNull(Op.get());
1031 return !LR || LR->getCategory() == RegCategory::NONE ||
1032 LR->getCategory() == PhiCategory;
1033 }),
1034 "Phi incoming values categories don't correspond");
1035 return PhiCategory;
1036 }
1037
1038 // If promotion is enforced, only one constant is enough to claim the phi
1039 // to have general category.
1040 if (EnforceCategoryPromotion &&
1041 llvm::any_of(Phi->incoming_values(),
1042 [](const Use &Op) { return isa<Constant>(Op.get()); }))
1043 return RegCategory::GENERAL;
1044
1045 return RegCategory::NONE;
1046 }
1047
1048 /***********************************************************************
1049 * getCategoryForCallArg : get register category from subroutine arg or
1050 * the corresponding arg at some call site
1051 *
1052 * Enter: Callee = function being called
1053 * ArgNo = argument number
1054 *
1055 * Return: register category from subroutine arg or a call arg with a
1056 * known category, else NONE if no category found
1057 *
1058 * We will not have disagreement among the subroutine arg and its corresponding
1059 * call args, since whichever one gets a category first forces the category of
1060 * all the others.
1061 */
getCategoryForCallArg(Function * Callee,unsigned ArgNo) const1062 unsigned GenXCategory::getCategoryForCallArg(Function *Callee, unsigned ArgNo) const
1063 {
1064 IGC_ASSERT(Callee);
1065 // First try the subroutine arg.
1066 auto ai = Callee->arg_begin();
1067 for (unsigned i = 0; i != ArgNo; ++i, ++ai)
1068 ;
1069 if (auto LR = Liveness->getLiveRangeOrNull(&*ai)) {
1070 unsigned Cat = LR->getCategory();
1071 if (Cat != RegCategory::NONE)
1072 return Cat;
1073 }
1074 // Then try the arg at each call site.
1075 for (auto *U: Callee->users()) {
1076 if (auto *CI = checkFunctionCall(U, Callee)) {
1077 auto ArgV = CI->getArgOperand(ArgNo);
1078 if (auto LR = Liveness->getLiveRangeOrNull(ArgV)) {
1079 unsigned Cat = LR->getCategory();
1080 if (Cat != RegCategory::NONE)
1081 return Cat;
1082 }
1083 }
1084 }
1085 // special case handling to break deadlock when all uses are undef or stack
1086 // call arg category cannot be deduced from the uses in the function, force
1087 // the argument to be GENERAL
1088 return EnforceCategoryPromotion ? RegCategory::GENERAL : RegCategory::NONE;
1089 }
1090
1091